diff --git a/.gitignore b/.gitignore
index 4cd5ca18..ff19abfa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,10 @@
+scripts/ibm_vllm_generate_protos/dist
+scripts/ibm_vllm_generate_protos/dts
+scripts/ibm_vllm_generate_protos/types
+
+infra/bee-code-interpreter/*
+!infra/bee-code-interpreter/bee-code-interpreter.yaml
+
 ### Node template
 # Logs
 logs
diff --git a/eslint.config.js b/eslint.config.js
index 628930f4..de19607a 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -8,11 +8,15 @@ import markdown from "@eslint/markdown";
 
 export default tseslint.config(
   {
-    ignores: ["node_modules/**", "dist/**"],
+    ignores: ["node_modules/**", "dist/**", "scripts/ibm_vllm_generate_protos/**"],
   },
   eslint.configs.recommended,
   ...tseslint.configs.strict,
   ...tseslint.configs.stylistic,
+  {
+    files: ["src/adapters/ibm-vllm/types.ts"],
+    rules: { "@typescript-eslint/unified-signatures": "off" },
+  },
   {
     files: ["**/*.md/**"],
     languageOptions: {
diff --git a/examples/llms/providers/ibm-vllm.ts b/examples/llms/providers/ibm-vllm.ts
new file mode 100644
index 00000000..a995a93a
--- /dev/null
+++ b/examples/llms/providers/ibm-vllm.ts
@@ -0,0 +1,36 @@
+import "dotenv/config.js";
+import { IBMvLLM } from "bee-agent-framework/adapters/ibm-vllm/llm";
+import { IBMVllmChatLLM } from "bee-agent-framework/adapters/ibm-vllm/chat";
+import { BaseMessage } from "bee-agent-framework/llms/primitives/message";
+import { Client } from "bee-agent-framework/adapters/ibm-vllm/client";
+
+const client = new Client();
+{
+  console.info("===RAW===");
+  const llm = new IBMvLLM({
+    client,
+    modelId: "meta-llama/llama-3-1-70b-instruct",
+  });
+
+  console.info("Meta", await llm.meta());
+
+  const response = await llm.generate("Hello world!", {
+    stream: false,
+  });
+  console.info(response.text);
+}
+
+{
+  console.info("===CHAT===");
+  const llm = IBMVllmChatLLM.fromPreset("meta-llama/llama-3-1-70b-instruct", { client });
+
+  console.info("Meta", await llm.meta());
+
+  const response = await llm.generate([
+    BaseMessage.of({
+      role: "user",
+      text: "Hello world!",
+    }),
+  ]);
+  console.info(response.messages);
+}
diff --git a/infra/bee-code-interpreter/k8s/.gitignore b/infra/bee-code-interpreter/k8s/.gitignore
deleted file mode 100644
index 97e18588..00000000
--- a/infra/bee-code-interpreter/k8s/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*
-!.gitignore
-!bee-code-interpreter.yaml
\ No newline at end of file
diff --git a/package.json b/package.json
index de5f70c0..678586aa 100644
--- a/package.json
+++ b/package.json
@@ -78,6 +78,7 @@
     "prepare": "husky",
     "copyright": "./scripts/copyright.sh",
     "release": "release-it",
+    "ibm-vllm:generate-types": "./scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh",
     "_ensure_env": "cp -n .env.template .env || true",
     "_docker": "yarn _ensure_env && bash -c 'source ./.env && docker_cmd=$(which docker >/dev/null 2>&1 && printf docker || printf podman) && $docker_cmd \"$@\"' sh"
   },
@@ -115,6 +116,8 @@
   },
   "peerDependencies": {
     "@googleapis/customsearch": "^3.2.0",
+    "@grpc/grpc-js": "^1.11.3",
+    "@grpc/proto-loader": "^0.7.13",
     "@ibm-generative-ai/node-sdk": "~3.2.3",
     "@langchain/community": "~0.2.28",
     "@langchain/core": "~0.2.27",
@@ -131,6 +134,8 @@
     "@eslint/js": "^9.9.0",
     "@eslint/markdown": "^6.0.0",
     "@googleapis/customsearch": "^3.2.0",
+    "@grpc/grpc-js": "^1.11.3",
+    "@grpc/proto-loader": "^0.7.13",
     "@ibm-generative-ai/node-sdk": "~3.2.3",
     "@langchain/community": "~0.2.28",
     "@langchain/core": "~0.2.27",
@@ -172,7 +177,7 @@
     "strip-ansi": "^7.1.0",
     "temp-dir": "^3.0.0",
     "tsc-files": "^1.1.4",
-    "tsup": "^8.2.4",
+    "tsup": "^8.3.0",
     "tsx": "^4.19.0",
     "typescript": "^5.5.4",
     "typescript-eslint": "^8.2.0",
diff --git a/scripts/copyright.sh b/scripts/copyright.sh
index 4188b6d9..2461b506 100755
--- a/scripts/copyright.sh
+++ b/scripts/copyright.sh
@@ -44,7 +44,7 @@ fi
 if command -v nwa &> /dev/null; then
   nwa add -l apache -c "$AUTHOR" src dist tests scripts
 elif command -v docker &> /dev/null; then
-  docker run -it -v "${PWD}:/src" ghcr.io/b1nary-gr0up/nwa:main add -l apache -c "$AUTHOR" src dist tests scripts
+  docker run --rm -v "${PWD}:/src" ghcr.io/b1nary-gr0up/nwa:main add -l apache -c "$AUTHOR" src dist tests scripts
 else
   echo "Error: 'nwa' is not available. Either install it manually or install go/docker."
   exit 1
diff --git a/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh b/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh
new file mode 100755
index 00000000..1b2e7589
--- /dev/null
+++ b/scripts/ibm_vllm_generate_protos/ibm_vllm_generate_protos.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright 2024 IBM Corp.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+GRPC_PROTO_PATH="./src/adapters/ibm-vllm/proto"
+GRPC_TYPES_PATH="./src/adapters/ibm-vllm/types.ts"
+
+SCRIPT_DIR="$(dirname "$0")"
+OUTPUT_RELATIVE_PATH="dist/generation.d.ts"
+GRPC_TYPES_TMP_PATH=types
+
+rm -f "$GRPC_TYPES_PATH"
+
+rm -rf "${SCRIPT_DIR}"/{dist,dts,types}
+
+
+yarn run proto-loader-gen-types \
+  --defaults \
+  --keepCase \
+  --oneofs \
+  --longs=Number \
+  --enums=String \
+  --grpcLib=@grpc/grpc-js \
+  --"outDir=${SCRIPT_DIR}/${GRPC_TYPES_TMP_PATH}" \
+  "${GRPC_PROTO_PATH}"/*.proto
+
+
+cd "$SCRIPT_DIR"
+  tsup --dts-only
+  sed -i.bak '$ d' "$OUTPUT_RELATIVE_PATH"
+  sed -i.bak -E "s/^interface/export interface/" "$OUTPUT_RELATIVE_PATH"
+  sed -i.bak -E "s/^type/export type/" "$OUTPUT_RELATIVE_PATH"
+cd -
+
+mv "$SCRIPT_DIR/$OUTPUT_RELATIVE_PATH" "$GRPC_TYPES_PATH"
+rm -rf "${SCRIPT_DIR}"/{dist,dts,types}
+
+yarn run lint:fix "${GRPC_TYPES_PATH}"
+yarn prettier --write "${GRPC_TYPES_PATH}"
+yarn copyright
diff --git a/scripts/ibm_vllm_generate_protos/package.json b/scripts/ibm_vllm_generate_protos/package.json
new file mode 100644
index 00000000..d25b1a7b
--- /dev/null
+++ b/scripts/ibm_vllm_generate_protos/package.json
@@ -0,0 +1,6 @@
+{
+  "name": "ibm-vllm-proto-types",
+  "type": "module",
+  "version": "1.0.0",
+  "typings": "./types/generation.d.ts"
+}
diff --git a/scripts/ibm_vllm_generate_protos/tsconfig.proto.json b/scripts/ibm_vllm_generate_protos/tsconfig.proto.json
new file mode 100644
index 00000000..3f3a9b39
--- /dev/null
+++ b/scripts/ibm_vllm_generate_protos/tsconfig.proto.json
@@ -0,0 +1,14 @@
+{
+  "compilerOptions": {
+    "moduleResolution": "node",
+    "rootDir": ".",
+    "baseUrl": ".",
+    "target": "ESNext",
+    "module": "ES6",
+    "outDir": "dist",
+    "declaration": true,
+    "emitDeclarationOnly": true,
+    "skipLibCheck": true,
+    "sourceMap": false
+  }
+}
diff --git a/scripts/ibm_vllm_generate_protos/tsup.config.ts b/scripts/ibm_vllm_generate_protos/tsup.config.ts
new file mode 100644
index 00000000..3cf25ebf
--- /dev/null
+++ b/scripts/ibm_vllm_generate_protos/tsup.config.ts
@@ -0,0 +1,32 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { defineConfig } from "tsup";
+
+export default defineConfig({
+  entry: ["types/generation.ts"],
+  tsconfig: "./tsconfig.proto.json",
+  sourcemap: false,
+  dts: true,
+  format: ["esm"],
+  treeshake: false,
+  legacyOutput: false,
+  skipNodeModulesBundle: true,
+  bundle: true,
+  splitting: false,
+  silent: false,
+  clean: true,
+});
diff --git a/src/adapters/ibm-vllm/chat.ts b/src/adapters/ibm-vllm/chat.ts
new file mode 100644
index 00000000..57677dd5
--- /dev/null
+++ b/src/adapters/ibm-vllm/chat.ts
@@ -0,0 +1,187 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { isFunction, isObjectType } from "remeda";
+
+import { IBMvLLM, IBMvLLMGenerateOptions, IBMvLLMOutput, IBMvLLMParameters } from "./llm.js";
+
+import { Cache } from "@/cache/decoratorCache.js";
+import { BaseMessage, Role } from "@/llms/primitives/message.js";
+import { Emitter } from "@/emitter/emitter.js";
+import { ChatLLM, ChatLLMOutput } from "@/llms/chat.js";
+import {
+  AsyncStream,
+  BaseLLMTokenizeOutput,
+  GenerateCallbacks,
+  LLMError,
+  LLMMeta,
+} from "@/llms/base.js";
+import { transformAsyncIterable } from "@/internals/helpers/stream.js";
+import { shallowCopy } from "@/serializer/utils.js";
+import { IBMVllmChatLLMPreset, IBMVllmChatLLMPresetModel } from "@/adapters/ibm-vllm/chatPreset.js";
+import { Client } from "./client.js";
+import { GetRunContext } from "@/context.js";
+
+export class GrpcChatLLMOutput extends ChatLLMOutput {
+  public readonly raw: IBMvLLMOutput;
+
+  constructor(rawOutput: IBMvLLMOutput) {
+    super();
+    this.raw = rawOutput;
+  }
+
+  @Cache()
+  get messages(): BaseMessage[] {
+    const text = this.raw.getTextContent();
+    return [
+      BaseMessage.of({
+        role: Role.ASSISTANT,
+        text,
+        meta: this.raw.meta,
+      }),
+    ];
+  }
+
+  merge(other: GrpcChatLLMOutput): void {
+    Cache.getInstance(this, "messages").clear();
+    this.raw.merge(other.raw);
+  }
+
+  getTextContent(): string {
+    const [message] = this.messages;
+    return message.text;
+  }
+
+  toString(): string {
+    return this.getTextContent();
+  }
+
+  createSnapshot() {
+    return {
+      raw: shallowCopy(this.raw),
+    };
+  }
+
+  loadSnapshot(snapshot: ReturnType<typeof this.createSnapshot>) {
+    Object.assign(this, snapshot);
+  }
+}
+
+export interface IBMVllmInputConfig {
+  messagesToPrompt: (messages: BaseMessage[]) => string;
+}
+
+export interface GrpcChatLLMInput {
+  llm: IBMvLLM;
+  config: IBMVllmInputConfig;
+}
+
+export class IBMVllmChatLLM extends ChatLLM<GrpcChatLLMOutput> {
+  public readonly emitter = new Emitter<GenerateCallbacks>({
+    namespace: ["ibm_vllm", "chat_llm"],
+    creator: this,
+  });
+
+  public readonly llm: IBMvLLM;
+  protected readonly config: IBMVllmInputConfig;
+
+  constructor({ llm, config }: GrpcChatLLMInput) {
+    super(llm.modelId, llm.executionOptions);
+    this.llm = llm;
+    this.config = config;
+  }
+
+  static {
+    this.register();
+  }
+
+  async meta(): Promise<LLMMeta> {
+    return this.llm.meta();
+  }
+
+  createSnapshot() {
+    return {
+      ...super.createSnapshot(),
+      modelId: this.modelId,
+      executionOptions: this.executionOptions,
+      llm: this.llm,
+      config: shallowCopy(this.config),
+    };
+  }
+
+  async tokenize(messages: BaseMessage[]): Promise<BaseLLMTokenizeOutput> {
+    const prompt = this.messagesToPrompt(messages);
+    return this.llm.tokenize(prompt);
+  }
+
+  protected async _generate(
+    messages: BaseMessage[],
+    options: IBMvLLMGenerateOptions | undefined,
+    run: GetRunContext<typeof this>,
+  ): Promise<GrpcChatLLMOutput> {
+    const prompt = this.messagesToPrompt(messages);
+    // @ts-expect-error protected property
+    const rawResponse = await this.llm._generate(prompt, options, run);
+    return new GrpcChatLLMOutput(rawResponse);
+  }
+
+  protected async *_stream(
+    messages: BaseMessage[],
+    options?: IBMvLLMGenerateOptions,
+  ): AsyncStream<GrpcChatLLMOutput, void> {
+    const prompt = this.messagesToPrompt(messages);
+    // @ts-expect-error protected property
+    const response = this.llm._stream(prompt, options);
+    return yield* transformAsyncIterable(response, (output) => new GrpcChatLLMOutput(output));
+  }
+
+  messagesToPrompt(messages: BaseMessage[]) {
+    return this.config.messagesToPrompt(messages);
+  }
+
+  static fromPreset(
+    modelId: IBMVllmChatLLMPresetModel,
+    overrides?: {
+      client?: Client;
+      parameters?: IBMvLLMParameters | ((value: IBMvLLMParameters) => IBMvLLMParameters);
+    },
+  ) {
+    const presetFactory = IBMVllmChatLLMPreset[modelId];
+    if (!presetFactory) {
+      throw new LLMError(`Model "${modelId}" does not exist in preset.`);
+    }
+
+    const preset = presetFactory();
+    let parameters = preset.base.parameters ?? {};
+    if (overrides) {
+      if (isFunction(overrides.parameters)) {
+        parameters = overrides.parameters(parameters);
+      } else if (isObjectType(overrides.parameters)) {
+        parameters = overrides.parameters;
+      }
+    }
+
+    return new IBMVllmChatLLM({
+      config: preset.chat,
+      llm: new IBMvLLM({
+        ...preset.base,
+        ...overrides,
+        parameters,
+        modelId,
+      }),
+    });
+  }
+}
diff --git a/src/adapters/ibm-vllm/chatPreset.ts b/src/adapters/ibm-vllm/chatPreset.ts
new file mode 100644
index 00000000..03d874f8
--- /dev/null
+++ b/src/adapters/ibm-vllm/chatPreset.ts
@@ -0,0 +1,150 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { LLMChatTemplate, LLMChatTemplates } from "@/adapters/shared/llmChatTemplates.js";
+import { z } from "zod";
+
+import { IBMVllmInputConfig } from "./chat.js";
+import { IBMvLLMInput } from "./llm.js";
+
+import { PromptTemplate } from "@/template.js";
+
+interface IBMVllmChatLLMPreset {
+  chat: IBMVllmInputConfig;
+  base: IBMvLLMInput;
+}
+
+export const IBMVllmModel = {
+  LLAMA_3_1_405B_INSTRUCT_FP8: "meta-llama/llama-3-1-405b-instruct-fp8",
+  LLAMA_3_1_70B_INSTRUCT: "meta-llama/llama-3-1-70b-instruct",
+  QWEN2_72B_INSTRUCT: "qwen/qwen2-72b-instruct",
+  GRANITE_INSTRUCT: "ibm/granite-instruct", // Generic model ID is used for ease of development, ground it once stable
+} as const;
+export type IBMVllmModel = (typeof IBMVllmModel)[keyof typeof IBMVllmModel];
+
+export const IBMVllmChatLLMPreset = {
+  [IBMVllmModel.LLAMA_3_1_405B_INSTRUCT_FP8]: (): IBMVllmChatLLMPreset => {
+    const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3.1");
+    return {
+      base: {
+        modelId: IBMVllmModel.LLAMA_3_1_70B_INSTRUCT,
+        parameters: {
+          method: "GREEDY",
+          stopping: {
+            stop_sequences: [...parameters.stop_sequence],
+            include_stop_sequence: false,
+            max_new_tokens: 2048,
+          },
+          decoding: {
+            repetition_penalty: 1,
+          },
+        },
+      },
+      chat: {
+        messagesToPrompt: messagesToPrompt(template),
+      },
+    };
+  },
+  [IBMVllmModel.LLAMA_3_1_70B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
+    const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("llama3.1");
+    return {
+      base: {
+        modelId: IBMVllmModel.LLAMA_3_1_70B_INSTRUCT,
+        parameters: {
+          method: "GREEDY",
+          stopping: {
+            stop_sequences: [...parameters.stop_sequence],
+            include_stop_sequence: false,
+            max_new_tokens: 2048,
+          },
+          decoding: {
+            repetition_penalty: 1,
+          },
+        },
+      },
+      chat: {
+        messagesToPrompt: messagesToPrompt(template),
+      },
+    };
+  },
+  [IBMVllmModel.QWEN2_72B_INSTRUCT]: (): IBMVllmChatLLMPreset => {
+    const { template, parameters, messagesToPrompt } = LLMChatTemplates.get("qwen2");
+    return {
+      base: {
+        modelId: IBMVllmModel.QWEN2_72B_INSTRUCT,
+        parameters: {
+          method: "GREEDY",
+          stopping: {
+            stop_sequences: [...parameters.stop_sequence],
+            include_stop_sequence: false,
+            max_new_tokens: 1024,
+          },
+        },
+      },
+      chat: {
+        messagesToPrompt: messagesToPrompt(template),
+      },
+    };
+  },
+  [IBMVllmModel.GRANITE_INSTRUCT]: (): IBMVllmChatLLMPreset => {
+    const llama31config = LLMChatTemplates.get("llama3.1");
+    const { template, parameters, messagesToPrompt } = {
+      template: new PromptTemplate({
+        schema: z.object({
+          messages: z.array(
+            z.object({
+              system: z.array(z.string()),
+              user: z.array(z.string()),
+              assistant: z.array(z.string()),
+              ipython: z.array(z.string()),
+            }),
+          ),
+        }),
+        template: `{{#messages}}{{#system}}<|start_of_role|>system<|end_of_role|>
+    
+    {{system}}<|end_of_text|>{{/system}}{{#user}}<|start_of_role|>user<|end_of_role|>
+    
+    {{user}}<|end_of_text|>{{/user}}{{#assistant}}<|start_of_role|>assistant<|end_of_role|>
+    
+    {{assistant}}<|end_of_text|>{{/assistant}}{{#ipython}}<|start_of_role|>ipython<|end_of_role|>
+    
+    {{ipython}}<|end_of_text|>{{/ipython}}{{/messages}}<|start_of_role|>assistant<|end_of_role|>
+    `,
+      }),
+      messagesToPrompt: llama31config.messagesToPrompt,
+      parameters: {
+        stop_sequence: ["<|end_of_text|>"],
+      },
+    } satisfies LLMChatTemplate;
+    return {
+      base: {
+        modelId: IBMVllmModel.GRANITE_INSTRUCT,
+        parameters: {
+          method: "GREEDY",
+          stopping: {
+            stop_sequences: [...parameters.stop_sequence],
+            include_stop_sequence: false,
+          },
+        },
+      },
+      chat: {
+        messagesToPrompt: messagesToPrompt(template),
+      },
+    };
+  },
+} as const satisfies { [key in IBMVllmModel]: () => IBMVllmChatLLMPreset };
+
+export type IBMVllmChatLLMPresetModel = keyof typeof IBMVllmChatLLMPreset;
diff --git a/src/adapters/ibm-vllm/client.ts b/src/adapters/ibm-vllm/client.ts
new file mode 100644
index 00000000..be4b8746
--- /dev/null
+++ b/src/adapters/ibm-vllm/client.ts
@@ -0,0 +1,216 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import grpc, {
+  CallOptions as GRPCCallOptions,
+  ClientOptions as GRPCClientOptions,
+  ClientReadableStream,
+  ClientUnaryCall,
+} from "@grpc/grpc-js";
+
+import * as R from "remeda";
+// eslint-disable-next-line no-restricted-imports
+import { UnaryCallback } from "@grpc/grpc-js/build/src/client.js";
+import { FrameworkError, ValueError } from "@/errors.js";
+import protoLoader from "@grpc/proto-loader";
+
+import {
+  BatchedGenerationRequest,
+  BatchedGenerationResponse__Output,
+  BatchedTokenizeRequest,
+  BatchedTokenizeResponse__Output,
+  GenerationRequest__Output,
+  ModelInfoRequest,
+  ModelInfoResponse__Output,
+  ProtoGrpcType as GenerationProtoGentypes,
+  SingleGenerationRequest,
+} from "@/adapters/ibm-vllm/types.js";
+import { parseEnv } from "@/internals/env.js";
+import { z } from "zod";
+import { Cache } from "@/cache/decoratorCache.js";
+import { Serializable } from "@/internals/serializable.js";
+
+const GENERATION_PROTO_PATH = new URL("./proto/generation.proto", import.meta.url);
+
+interface ClientOptions {
+  modelRouterSubdomain?: string;
+  url: string;
+  credentials: {
+    rootCert: string;
+    certChain: string;
+    privateKey: string;
+  };
+  grpcClientOptions: GRPCClientOptions;
+  clientShutdownDelay: number;
+}
+
+const defaultOptions = {
+  clientShutdownDelay: 5 * 60 * 1000,
+  grpcClientOptions: {
+    // This is needed, otherwise communication to DIPC cluster fails with "Dropped connection" error after +- 50 secs
+    "grpc.keepalive_time_ms": 25000,
+    "grpc.max_receive_message_length": 32 * 1024 * 1024, // 32MiB
+  },
+};
+
+const generationPackageObject = grpc.loadPackageDefinition(
+  protoLoader.loadSync([GENERATION_PROTO_PATH.pathname], {
+    longs: Number,
+    enums: String,
+    arrays: true,
+    objects: true,
+    oneofs: true,
+    keepCase: true,
+    defaults: true,
+  }),
+) as unknown as GenerationProtoGentypes;
+
+const GRPC_CLIENT_TTL = 15 * 60 * 1000;
+
+type CallOptions = GRPCCallOptions & { signal?: AbortSignal };
+type RequiredModel<T> = T & { model_id: string };
+
+export class Client extends Serializable {
+  public readonly options: ClientOptions;
+  private usedDefaultCredentials = false;
+
+  @Cache({ ttl: GRPC_CLIENT_TTL })
+  protected getClient(modelId: string) {
+    const modelSpecificUrl = this.options.url.replace(/{model_id}/, modelId.replaceAll("/", "--"));
+    const client = new generationPackageObject.fmaas.GenerationService(
+      modelSpecificUrl,
+      grpc.credentials.createSsl(
+        Buffer.from(this.options.credentials.rootCert),
+        Buffer.from(this.options.credentials.privateKey),
+        Buffer.from(this.options.credentials.certChain),
+      ),
+      this.options.grpcClientOptions,
+    );
+    setTimeout(() => {
+      try {
+        client.close();
+      } catch {
+        /* empty */
+      }
+    }, GRPC_CLIENT_TTL + this.options.clientShutdownDelay).unref();
+    return client;
+  }
+
+  protected getDefaultCredentials() {
+    this.usedDefaultCredentials = true;
+    return {
+      rootCert: parseEnv("IBM_VLLM_ROOT_CERT", z.string()),
+      privateKey: parseEnv("IBM_VLLM_PRIVATE_KEY", z.string()),
+      certChain: parseEnv("IBM_VLLM_CERT_CHAIN", z.string()),
+    };
+  }
+
+  constructor(options?: Partial<ClientOptions>) {
+    super();
+    this.options = {
+      ...defaultOptions,
+      ...options,
+      url: options?.url ?? parseEnv("IBM_VLLM_URL", z.string()),
+      credentials: options?.credentials ?? this.getDefaultCredentials(),
+    };
+  }
+
+  async modelInfo(request: RequiredModel<ModelInfoRequest>, options?: CallOptions) {
+    const client = this.getClient(request.model_id);
+    return this.wrapGrpcCall<ModelInfoRequest, ModelInfoResponse__Output>(
+      client.modelInfo.bind(client),
+    )(request, options);
+  }
+
+  async generate(request: RequiredModel<BatchedGenerationRequest>, options?: CallOptions) {
+    const client = this.getClient(request.model_id);
+    return this.wrapGrpcCall<BatchedGenerationRequest, BatchedGenerationResponse__Output>(
+      client.generate.bind(client),
+    )(request, options);
+  }
+
+  async generateStream(request: RequiredModel<SingleGenerationRequest>, options?: CallOptions) {
+    const client = this.getClient(request.model_id);
+    return this.wrapGrpcStream<SingleGenerationRequest, GenerationRequest__Output>(
+      client.generateStream.bind(client),
+    )(request, options);
+  }
+
+  async tokenize(request: RequiredModel<BatchedTokenizeRequest>, options?: CallOptions) {
+    const client = this.getClient(request.model_id);
+    return this.wrapGrpcCall<BatchedTokenizeRequest, BatchedTokenizeResponse__Output>(
+      client.tokenize.bind(client),
+    )(request, options);
+  }
+
+  protected wrapGrpcCall<TRequest, TResponse>(
+    fn: (
+      request: TRequest,
+      options: CallOptions,
+      callback: UnaryCallback<TResponse>,
+    ) => ClientUnaryCall,
+  ) {
+    return (request: TRequest, { signal, ...options }: CallOptions = {}): Promise<TResponse> => {
+      return new Promise<TResponse>((resolve, reject) => {
+        const call = fn(request, options, (err, response) => {
+          signal?.removeEventListener("abort", abortHandler);
+          if (err) {
+            reject(err);
+          } else {
+            if (response === undefined) {
+              reject(new FrameworkError("Invalid response from GRPC server"));
+            } else {
+              resolve(response);
+            }
+          }
+        });
+        const abortHandler = () => call.cancel();
+        signal?.addEventListener("abort", abortHandler, { once: true });
+      });
+    };
+  }
+
+  protected wrapGrpcStream<TRequest, TResponse>(
+    fn: (request: TRequest, options: CallOptions) => ClientReadableStream<TResponse>,
+  ) {
+    return async (
+      request: TRequest,
+      { signal, ...options }: CallOptions = {},
+    ): Promise<ClientReadableStream<TResponse>> => {
+      const stream = fn(request, options);
+      const abortHandler = () => stream.cancel();
+      signal?.addEventListener("abort", abortHandler, { once: true });
+      stream.addListener("close", () => signal?.removeEventListener("abort", abortHandler));
+      return stream;
+    };
+  }
+
+  createSnapshot() {
+    if (!this.usedDefaultCredentials) {
+      throw new ValueError(
+        "Cannot serialize a client with credentials passed directly. Use environment variables.",
+      );
+    }
+    return {
+      options: R.omit(this.options, ["credentials"]),
+    };
+  }
+
+  loadSnapshot(snapshot: ReturnType<typeof this.createSnapshot>) {
+    Object.assign(this, snapshot);
+    this.options.credentials = this.getDefaultCredentials();
+  }
+}
diff --git a/src/adapters/ibm-vllm/llm.ts b/src/adapters/ibm-vllm/llm.ts
new file mode 100644
index 00000000..9f3c5485
--- /dev/null
+++ b/src/adapters/ibm-vllm/llm.ts
@@ -0,0 +1,248 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import {
+  AsyncStream,
+  BaseLLMOutput,
+  BaseLLMTokenizeOutput,
+  ExecutionOptions,
+  GenerateCallbacks,
+  GenerateOptions,
+  LLMError,
+  LLMMeta,
+} from "@/llms/base.js";
+import { isEmpty, isString } from "remeda";
+import type { SingleGenerationRequest } from "@/adapters/ibm-vllm/types.js";
+import { LLM, LLMInput } from "@/llms/llm.js";
+import { Emitter } from "@/emitter/emitter.js";
+import { GenerationResponse__Output } from "@/adapters/ibm-vllm/types.js";
+import { shallowCopy } from "@/serializer/utils.js";
+import { FrameworkError, NotImplementedError } from "@/errors.js";
+import { assign } from "@/internals/helpers/object.js";
+import { ServiceError } from "@grpc/grpc-js";
+import { Client } from "@/adapters/ibm-vllm/client.js";
+import { GetRunContext } from "@/context.js";
+import { BatchedGenerationRequest } from "./types.js";
+
+function isGrpcServiceError(err: unknown): err is ServiceError {
+  return (
+    err instanceof Error &&
+    err.constructor.name === "Error" &&
+    "code" in err &&
+    typeof err.code === "number"
+  );
+}
+
+export class IBMvLLMOutput extends BaseLLMOutput {
+  constructor(
+    public text: string,
+    public readonly meta: Record<string, any>,
+  ) {
+    super();
+  }
+
+  static {
+    this.register();
+  }
+
+  merge(other: IBMvLLMOutput): void {
+    this.text += other.text;
+    assign(this.meta, other.meta);
+  }
+
+  getTextContent(): string {
+    return this.text;
+  }
+
+  toString(): string {
+    return this.getTextContent();
+  }
+
+  createSnapshot() {
+    return {
+      text: this.text,
+      meta: shallowCopy(this.meta),
+    };
+  }
+
+  loadSnapshot(snapshot: ReturnType<typeof this.createSnapshot>) {
+    Object.assign(this, snapshot);
+  }
+}
+
+export interface IBMvLLMInput {
+  client?: Client;
+  modelId: string;
+  parameters?: IBMvLLMParameters;
+  executionOptions?: ExecutionOptions;
+}
+
+export type IBMvLLMParameters = NonNullable<
+  BatchedGenerationRequest["params"] & SingleGenerationRequest["params"]
+>;
+
+export interface IBMvLLMGenerateOptions extends GenerateOptions {}
+
+export class IBMvLLM extends LLM<IBMvLLMOutput, IBMvLLMGenerateOptions> {
+  public readonly emitter = new Emitter<GenerateCallbacks>({
+    namespace: ["ibm_vllm", "llm"],
+    creator: this,
+  });
+
+  public readonly client: Client;
+  public readonly parameters: Partial<IBMvLLMParameters>;
+
+  constructor({ client, modelId, parameters = {}, executionOptions }: IBMvLLMInput) {
+    super(modelId, executionOptions);
+    this.client = client ?? new Client();
+    this.parameters = parameters ?? {};
+  }
+
+  static {
+    this.register();
+  }
+
+  async meta(): Promise<LLMMeta> {
+    const response = await this.client.modelInfo({ model_id: this.modelId });
+    return {
+      tokenLimit: response.max_sequence_length,
+    };
+  }
+
+  async tokenize(input: LLMInput): Promise<BaseLLMTokenizeOutput> {
+    try {
+      const response = await this.client.tokenize({
+        model_id: this.modelId,
+        requests: [{ text: input }],
+      });
+      const output = response.responses.at(0);
+      if (!output) {
+        throw new LLMError("Missing output", [], { context: { response } });
+      }
+      return {
+        tokens: output.tokens,
+        tokensCount: output.token_count,
+      };
+    } catch (err) {
+      throw this._transformError(err);
+    }
+  }
+
+  protected async _generate(
+    input: LLMInput,
+    options: IBMvLLMGenerateOptions | undefined,
+    run: GetRunContext<this>,
+  ): Promise<IBMvLLMOutput> {
+    try {
+      const response = await this.client.generate(
+        {
+          model_id: this.modelId,
+          requests: [{ text: input }],
+          params: this._prepareParameters(options),
+        },
+        { signal: run.signal },
+      );
+      const output = response.responses.at(0);
+      if (!output) {
+        throw new LLMError("Missing output", [], { context: { response } });
+      }
+
+      const { text, ...rest } = output;
+      return new IBMvLLMOutput(text, rest);
+    } catch (err) {
+      throw this._transformError(err);
+    }
+  }
+
+  protected async *_stream(
+    input: string,
+    options: IBMvLLMGenerateOptions | undefined,
+    run: GetRunContext<typeof this>,
+  ): AsyncStream<IBMvLLMOutput> {
+    try {
+      const stream = await this.client.generateStream(
+        {
+          model_id: this.modelId,
+          request: { text: input },
+          params: this._prepareParameters(options),
+        },
+        { signal: run.signal },
+      );
+      for await (const chunk of stream) {
+        const typedChunk = chunk as GenerationResponse__Output;
+        const { text, ...rest } = typedChunk;
+        if (text.length > 0) {
+          yield new IBMvLLMOutput(text, rest);
+        }
+      }
+    } catch (err) {
+      throw this._transformError(err);
+    }
+  }
+
+  createSnapshot() {
+    return {
+      ...super.createSnapshot(),
+      client: this.client,
+      modelId: this.modelId,
+      parameters: shallowCopy(this.parameters),
+      executionOptions: shallowCopy(this.executionOptions),
+    };
+  }
+
+  loadSnapshot(snapshot: ReturnType<typeof this.createSnapshot>) {
+    super.loadSnapshot(snapshot);
+    Object.assign(this, snapshot);
+  }
+
+  protected _transformError(error: Error): Error {
+    if (error instanceof FrameworkError) {
+      throw error;
+    }
+    if (isGrpcServiceError(error)) {
+      throw new LLMError("LLM has occurred an error!", [error], {
+        isRetryable: [8, 4, 14].includes(error.code),
+      });
+    }
+    return new LLMError("LLM has occurred an error!", [error]);
+  }
+
+  protected _prepareParameters(overrides?: GenerateOptions): typeof this.parameters {
+    const guided = overrides?.guided ? {} : (this.parameters.decoding ?? {});
+    const guidedOverride = overrides?.guided;
+
+    if (guidedOverride?.choice) {
+      guided.choice = { ...guided.choice, choices: guidedOverride.choice };
+    } else if (guidedOverride?.grammar) {
+      guided.grammar = guidedOverride.grammar;
+    } else if (guidedOverride?.json) {
+      guided.json_schema = isString(guidedOverride.json)
+        ? JSON.parse(guidedOverride.json)
+        : guidedOverride.json;
+    } else if (guidedOverride?.regex) {
+      guided.regex = guidedOverride.regex;
+    } else if (!isEmpty(guidedOverride ?? {})) {
+      throw new NotImplementedError(
+        `Following types ${Object.keys(overrides!.guided!).join(",")}" for the constraint decoding are not supported!`,
+      );
+    }
+
+    return {
+      ...this.parameters,
+      decoding: guided,
+    };
+  }
+}
diff --git a/src/adapters/ibm-vllm/proto/generation.proto b/src/adapters/ibm-vllm/proto/generation.proto
new file mode 100644
index 00000000..709bc751
--- /dev/null
+++ b/src/adapters/ibm-vllm/proto/generation.proto
@@ -0,0 +1,289 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Source: https://github.com/IBM/vllm/blob/main/proto/generation.proto
+
+syntax = "proto3";
+package fmaas;
+
+
+service GenerationService {
+  // Generates text given a text prompt, for one or more inputs
+  rpc Generate (BatchedGenerationRequest) returns (BatchedGenerationResponse) {}
+  // Generates text given a single input prompt, streaming the response
+  rpc GenerateStream (SingleGenerationRequest) returns (stream GenerationResponse) {}
+  // Tokenize text
+  rpc Tokenize (BatchedTokenizeRequest) returns (BatchedTokenizeResponse) {}
+  // Model info
+  rpc ModelInfo (ModelInfoRequest) returns (ModelInfoResponse) {}
+}
+
+// ============================================================================================================
+// Generation API
+
+enum DecodingMethod {
+  GREEDY = 0;
+  SAMPLE = 1;
+}
+
+message BatchedGenerationRequest {
+  string model_id = 1;
+  // Deprecated in favor of adapter_id
+  optional string prefix_id = 2;
+  optional string adapter_id = 4;
+  repeated GenerationRequest requests = 3;
+
+  Parameters params = 10;
+}
+
+message SingleGenerationRequest {
+  string model_id = 1;
+  // Deprecated in favor of adapter_id
+  optional string prefix_id = 2;
+  optional string adapter_id = 4;
+  GenerationRequest request = 3;
+
+  Parameters params = 10;
+}
+
+message BatchedGenerationResponse {
+  repeated GenerationResponse responses = 1;
+}
+
+message GenerationRequest {
+  string text = 2;
+}
+
+message GenerationResponse {
+  uint32 input_token_count = 6;
+  uint32 generated_token_count = 2;
+  string text = 4;
+  StopReason stop_reason = 7;
+  // The stop sequence encountered, iff stop_reason == STOP_SEQUENCE
+  string stop_sequence = 11;
+  // Random seed used, not applicable for greedy requests
+  uint64 seed = 10;
+
+  // Individual generated tokens and associated details, if requested
+  repeated TokenInfo tokens = 8;
+
+  // Input tokens and associated details, if requested
+  repeated TokenInfo input_tokens = 9;
+}
+
+message Parameters {
+  // The high level decoding approach
+  DecodingMethod method = 1;
+  // Parameters related to sampling, applicable only when method == SAMPLING
+  SamplingParameters sampling = 2;
+  // Parameters controlling when generation should stop
+  StoppingCriteria stopping = 3;
+  // Flags to control what is returned in the response
+  ResponseOptions response = 4;
+  // Parameters for conditionally penalizing/boosting
+  // candidate tokens during decoding
+  DecodingParameters decoding = 5;
+  // Truncate to this many input tokens. Can be used to avoid requests
+  // failing due to input being longer than configured limits.
+  // Zero means don't truncate.
+  uint32 truncate_input_tokens = 6;
+}
+
+message DecodingParameters {
+  message LengthPenalty {
+    // Start the decay after this number of tokens have been generated
+    uint32 start_index = 1;
+    // Factor of exponential decay
+    float decay_factor = 2;
+  }
+
+  // Default (0.0) means no penalty (equivalent to 1.0)
+  // 1.2 is a recommended value
+  float repetition_penalty = 1;
+
+  // Exponentially increases the score of the EOS token
+  // once start_index tokens have been generated
+  optional LengthPenalty length_penalty = 2;
+
+  enum ResponseFormat {
+    // Plain text, no constraints
+    TEXT = 0;
+    // Valid json
+    JSON = 1;
+  }
+
+  message StringChoices {
+    repeated string choices = 1;
+  }
+
+  // Mutually-exclusive guided decoding options
+  oneof guided {
+    // Output will be in the specified format
+    ResponseFormat format = 3;
+    // Output will follow the provided JSON schema
+    string json_schema = 4;
+    // Output will follow the provided regex pattern
+    string regex = 5;
+    // Output will be exactly one of the specified choices
+    StringChoices choice = 6;
+    // Output will follow the provided context free grammar
+    string grammar = 7;
+  }
+}
+
+
+message SamplingParameters {
+  // Default (0.0) means disabled (equivalent to 1.0)
+  float temperature = 1;
+  // Default (0) means disabled
+  uint32 top_k = 2;
+  // Default (0) means disabled (equivalent to 1.0)
+  float top_p = 3;
+  // Default (0) means disabled (equivalent to 1.0)
+  float typical_p = 4;
+
+  optional uint64 seed = 5;
+}
+
+message StoppingCriteria {
+  // Default (0) is currently 20
+  uint32 max_new_tokens = 1;
+  // Default (0) means no minimum
+  uint32 min_new_tokens = 2;
+  // Default (0) means no time limit
+  uint32 time_limit_millis = 3;
+  repeated string stop_sequences = 4;
+  // If not specified, default behavior depends on server setting
+  optional bool include_stop_sequence = 5;
+
+  //more to come
+}
+
+message ResponseOptions {
+  // Include input text
+  bool input_text = 1;
+  // Include list of individual generated tokens
+  // "Extra" token information is included based on the other flags below
+  bool generated_tokens = 2;
+  // Include list of input tokens
+  // "Extra" token information is included based on the other flags here,
+  // but only for decoder-only models
+  bool input_tokens = 3;
+  // Include logprob for each returned token
+  // Applicable only if generated_tokens == true and/or input_tokens == true
+  bool token_logprobs = 4;
+  // Include rank of each returned token
+  // Applicable only if generated_tokens == true and/or input_tokens == true
+  bool token_ranks = 5;
+  // Include top n candidate tokens at the position of each returned token
+  // The maximum value permitted is 5, but more may be returned if there is a tie
+  // for nth place.
+  // Applicable only if generated_tokens == true and/or input_tokens == true
+  uint32 top_n_tokens = 6;
+}
+
+enum StopReason {
+  // Possibly more tokens to be streamed
+  NOT_FINISHED = 0;
+  // Maximum requested tokens reached
+  MAX_TOKENS = 1;
+  // End-of-sequence token encountered
+  EOS_TOKEN = 2;
+  // Request cancelled by client
+  CANCELLED = 3;
+  // Time limit reached
+  TIME_LIMIT = 4;
+  // Stop sequence encountered
+  STOP_SEQUENCE = 5;
+  // Total token limit reached
+  TOKEN_LIMIT = 6;
+  // Decoding error
+  ERROR = 7;
+}
+
+message TokenInfo {
+  // uint32 id = 1;  // TBD
+  string text = 2;
+  // The logprob (log of normalized probability), if requested
+  float logprob = 3;
+  // One-based rank relative to other tokens, if requested
+  uint32 rank = 4;
+
+  message TopToken {
+    // uint32 id = 1;  // TBD
+    string text = 2;
+    float logprob = 3;
+  }
+
+  // Top N candidate tokens at this position, if requested
+  // May or may not include this token
+  repeated TopToken top_tokens = 5;
+}
+
+
+// ============================================================================================================
+// Tokenization API
+
+message BatchedTokenizeRequest {
+  string model_id = 1;
+  repeated TokenizeRequest requests = 2;
+  bool return_tokens = 3;
+  bool return_offsets = 4;
+
+  // Zero means don't truncate.
+  uint32 truncate_input_tokens = 5;
+}
+
+message BatchedTokenizeResponse {
+  repeated TokenizeResponse responses = 1;
+}
+
+message TokenizeRequest {
+  string text = 1;
+}
+
+message TokenizeResponse {
+  message Offset {
+    uint32 start = 1;
+    uint32 end = 2;
+  }
+
+  uint32 token_count = 1;
+
+  // if return_tokens = true
+  repeated string tokens = 2;
+  // if return_tokens = true
+  repeated Offset offsets = 3;
+}
+
+
+// ============================================================================================================
+// Model Info API
+
+message ModelInfoRequest {
+  string model_id = 1;
+}
+
+message ModelInfoResponse {
+  enum ModelKind {
+    DECODER_ONLY = 0;
+    ENCODER_DECODER = 1;
+  }
+
+  ModelKind model_kind = 1;
+  uint32 max_sequence_length = 2;
+  uint32 max_new_tokens = 3;
+}
diff --git a/src/adapters/ibm-vllm/types.ts b/src/adapters/ibm-vllm/types.ts
new file mode 100644
index 00000000..55bcc902
--- /dev/null
+++ b/src/adapters/ibm-vllm/types.ts
@@ -0,0 +1,535 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import * as grpc from "@grpc/grpc-js";
+import {
+  Long,
+  MethodDefinition,
+  MessageTypeDefinition,
+  EnumTypeDefinition,
+} from "@grpc/proto-loader";
+
+export interface GenerationRequest {
+  text?: string;
+}
+export interface GenerationRequest__Output {
+  text: string;
+}
+
+declare const DecodingMethod: {
+  readonly GREEDY: "GREEDY";
+  readonly SAMPLE: "SAMPLE";
+};
+export type DecodingMethod = "GREEDY" | 0 | "SAMPLE" | 1;
+export type DecodingMethod__Output = (typeof DecodingMethod)[keyof typeof DecodingMethod];
+
+export interface SamplingParameters {
+  temperature?: number | string;
+  top_k?: number;
+  top_p?: number | string;
+  typical_p?: number | string;
+  seed?: number | string | Long;
+  _seed?: "seed";
+}
+export interface SamplingParameters__Output {
+  temperature: number;
+  top_k: number;
+  top_p: number;
+  typical_p: number;
+  seed?: number;
+  _seed: "seed";
+}
+
+export interface StoppingCriteria {
+  max_new_tokens?: number;
+  min_new_tokens?: number;
+  time_limit_millis?: number;
+  stop_sequences?: string[];
+  include_stop_sequence?: boolean;
+  _include_stop_sequence?: "include_stop_sequence";
+}
+export interface StoppingCriteria__Output {
+  max_new_tokens: number;
+  min_new_tokens: number;
+  time_limit_millis: number;
+  stop_sequences: string[];
+  include_stop_sequence?: boolean;
+  _include_stop_sequence: "include_stop_sequence";
+}
+
+export interface ResponseOptions {
+  input_text?: boolean;
+  generated_tokens?: boolean;
+  input_tokens?: boolean;
+  token_logprobs?: boolean;
+  token_ranks?: boolean;
+  top_n_tokens?: number;
+}
+export interface ResponseOptions__Output {
+  input_text: boolean;
+  generated_tokens: boolean;
+  input_tokens: boolean;
+  token_logprobs: boolean;
+  token_ranks: boolean;
+  top_n_tokens: number;
+}
+
+export interface _fmaas_DecodingParameters_LengthPenalty {
+  start_index?: number;
+  decay_factor?: number | string;
+}
+export interface _fmaas_DecodingParameters_LengthPenalty__Output {
+  start_index: number;
+  decay_factor: number;
+}
+declare const _fmaas_DecodingParameters_ResponseFormat: {
+  readonly TEXT: "TEXT";
+  readonly JSON: "JSON";
+};
+export type _fmaas_DecodingParameters_ResponseFormat = "TEXT" | 0 | "JSON" | 1;
+export type _fmaas_DecodingParameters_ResponseFormat__Output =
+  (typeof _fmaas_DecodingParameters_ResponseFormat)[keyof typeof _fmaas_DecodingParameters_ResponseFormat];
+export interface _fmaas_DecodingParameters_StringChoices {
+  choices?: string[];
+}
+export interface _fmaas_DecodingParameters_StringChoices__Output {
+  choices: string[];
+}
+export interface DecodingParameters {
+  repetition_penalty?: number | string;
+  length_penalty?: _fmaas_DecodingParameters_LengthPenalty | null;
+  format?: _fmaas_DecodingParameters_ResponseFormat;
+  json_schema?: string;
+  regex?: string;
+  choice?: _fmaas_DecodingParameters_StringChoices | null;
+  grammar?: string;
+  _length_penalty?: "length_penalty";
+  guided?: "format" | "json_schema" | "regex" | "choice" | "grammar";
+}
+export interface DecodingParameters__Output {
+  repetition_penalty: number;
+  length_penalty?: _fmaas_DecodingParameters_LengthPenalty__Output | null;
+  format?: _fmaas_DecodingParameters_ResponseFormat__Output;
+  json_schema?: string;
+  regex?: string;
+  choice?: _fmaas_DecodingParameters_StringChoices__Output | null;
+  grammar?: string;
+  _length_penalty: "length_penalty";
+  guided: "format" | "json_schema" | "regex" | "choice" | "grammar";
+}
+
+export interface Parameters {
+  method?: DecodingMethod;
+  sampling?: SamplingParameters | null;
+  stopping?: StoppingCriteria | null;
+  response?: ResponseOptions | null;
+  decoding?: DecodingParameters | null;
+  truncate_input_tokens?: number;
+}
+export interface Parameters__Output {
+  method: DecodingMethod__Output;
+  sampling: SamplingParameters__Output | null;
+  stopping: StoppingCriteria__Output | null;
+  response: ResponseOptions__Output | null;
+  decoding: DecodingParameters__Output | null;
+  truncate_input_tokens: number;
+}
+
+export interface BatchedGenerationRequest {
+  model_id?: string;
+  prefix_id?: string;
+  requests?: GenerationRequest[];
+  adapter_id?: string;
+  params?: Parameters | null;
+  _prefix_id?: "prefix_id";
+  _adapter_id?: "adapter_id";
+}
+export interface BatchedGenerationRequest__Output {
+  model_id: string;
+  prefix_id?: string;
+  requests: GenerationRequest__Output[];
+  adapter_id?: string;
+  params: Parameters__Output | null;
+  _prefix_id: "prefix_id";
+  _adapter_id: "adapter_id";
+}
+
+declare const StopReason: {
+  readonly NOT_FINISHED: "NOT_FINISHED";
+  readonly MAX_TOKENS: "MAX_TOKENS";
+  readonly EOS_TOKEN: "EOS_TOKEN";
+  readonly CANCELLED: "CANCELLED";
+  readonly TIME_LIMIT: "TIME_LIMIT";
+  readonly STOP_SEQUENCE: "STOP_SEQUENCE";
+  readonly TOKEN_LIMIT: "TOKEN_LIMIT";
+  readonly ERROR: "ERROR";
+};
+export type StopReason =
+  | "NOT_FINISHED"
+  | 0
+  | "MAX_TOKENS"
+  | 1
+  | "EOS_TOKEN"
+  | 2
+  | "CANCELLED"
+  | 3
+  | "TIME_LIMIT"
+  | 4
+  | "STOP_SEQUENCE"
+  | 5
+  | "TOKEN_LIMIT"
+  | 6
+  | "ERROR"
+  | 7;
+export type StopReason__Output = (typeof StopReason)[keyof typeof StopReason];
+
+export interface _fmaas_TokenInfo_TopToken {
+  text?: string;
+  logprob?: number | string;
+}
+export interface _fmaas_TokenInfo_TopToken__Output {
+  text: string;
+  logprob: number;
+}
+export interface TokenInfo {
+  text?: string;
+  logprob?: number | string;
+  rank?: number;
+  top_tokens?: _fmaas_TokenInfo_TopToken[];
+}
+export interface TokenInfo__Output {
+  text: string;
+  logprob: number;
+  rank: number;
+  top_tokens: _fmaas_TokenInfo_TopToken__Output[];
+}
+
+export interface GenerationResponse {
+  generated_token_count?: number;
+  text?: string;
+  input_token_count?: number;
+  stop_reason?: StopReason;
+  tokens?: TokenInfo[];
+  input_tokens?: TokenInfo[];
+  seed?: number | string | Long;
+  stop_sequence?: string;
+}
+export interface GenerationResponse__Output {
+  generated_token_count: number;
+  text: string;
+  input_token_count: number;
+  stop_reason: StopReason__Output;
+  tokens: TokenInfo__Output[];
+  input_tokens: TokenInfo__Output[];
+  seed: number;
+  stop_sequence: string;
+}
+
+export interface BatchedGenerationResponse {
+  responses?: GenerationResponse[];
+}
+export interface BatchedGenerationResponse__Output {
+  responses: GenerationResponse__Output[];
+}
+
+export interface TokenizeRequest {
+  text?: string;
+}
+export interface TokenizeRequest__Output {
+  text: string;
+}
+
+export interface BatchedTokenizeRequest {
+  model_id?: string;
+  requests?: TokenizeRequest[];
+  return_tokens?: boolean;
+  return_offsets?: boolean;
+  truncate_input_tokens?: number;
+}
+export interface BatchedTokenizeRequest__Output {
+  model_id: string;
+  requests: TokenizeRequest__Output[];
+  return_tokens: boolean;
+  return_offsets: boolean;
+  truncate_input_tokens: number;
+}
+
+export interface _fmaas_TokenizeResponse_Offset {
+  start?: number;
+  end?: number;
+}
+export interface _fmaas_TokenizeResponse_Offset__Output {
+  start: number;
+  end: number;
+}
+export interface TokenizeResponse {
+  token_count?: number;
+  tokens?: string[];
+  offsets?: _fmaas_TokenizeResponse_Offset[];
+}
+export interface TokenizeResponse__Output {
+  token_count: number;
+  tokens: string[];
+  offsets: _fmaas_TokenizeResponse_Offset__Output[];
+}
+
+export interface BatchedTokenizeResponse {
+  responses?: TokenizeResponse[];
+}
+export interface BatchedTokenizeResponse__Output {
+  responses: TokenizeResponse__Output[];
+}
+
+export interface ModelInfoRequest {
+  model_id?: string;
+}
+export interface ModelInfoRequest__Output {
+  model_id: string;
+}
+
+declare const _fmaas_ModelInfoResponse_ModelKind: {
+  readonly DECODER_ONLY: "DECODER_ONLY";
+  readonly ENCODER_DECODER: "ENCODER_DECODER";
+};
+export type _fmaas_ModelInfoResponse_ModelKind = "DECODER_ONLY" | 0 | "ENCODER_DECODER" | 1;
+export type _fmaas_ModelInfoResponse_ModelKind__Output =
+  (typeof _fmaas_ModelInfoResponse_ModelKind)[keyof typeof _fmaas_ModelInfoResponse_ModelKind];
+export interface ModelInfoResponse {
+  model_kind?: _fmaas_ModelInfoResponse_ModelKind;
+  max_sequence_length?: number;
+  max_new_tokens?: number;
+}
+export interface ModelInfoResponse__Output {
+  model_kind: _fmaas_ModelInfoResponse_ModelKind__Output;
+  max_sequence_length: number;
+  max_new_tokens: number;
+}
+
+export interface SingleGenerationRequest {
+  model_id?: string;
+  prefix_id?: string;
+  request?: GenerationRequest | null;
+  adapter_id?: string;
+  params?: Parameters | null;
+  _prefix_id?: "prefix_id";
+  _adapter_id?: "adapter_id";
+}
+export interface SingleGenerationRequest__Output {
+  model_id: string;
+  prefix_id?: string;
+  request: GenerationRequest__Output | null;
+  adapter_id?: string;
+  params: Parameters__Output | null;
+  _prefix_id: "prefix_id";
+  _adapter_id: "adapter_id";
+}
+
+export interface GenerationServiceClient extends grpc.Client {
+  Generate(
+    argument: BatchedGenerationRequest,
+    metadata: grpc.Metadata,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<BatchedGenerationResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  Generate(
+    argument: BatchedGenerationRequest,
+    metadata: grpc.Metadata,
+    callback: grpc.requestCallback<BatchedGenerationResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  Generate(
+    argument: BatchedGenerationRequest,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<BatchedGenerationResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  Generate(
+    argument: BatchedGenerationRequest,
+    callback: grpc.requestCallback<BatchedGenerationResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  generate(
+    argument: BatchedGenerationRequest,
+    metadata: grpc.Metadata,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<BatchedGenerationResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  generate(
+    argument: BatchedGenerationRequest,
+    metadata: grpc.Metadata,
+    callback: grpc.requestCallback<BatchedGenerationResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  generate(
+    argument: BatchedGenerationRequest,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<BatchedGenerationResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  generate(
+    argument: BatchedGenerationRequest,
+    callback: grpc.requestCallback<BatchedGenerationResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  GenerateStream(
+    argument: SingleGenerationRequest,
+    metadata: grpc.Metadata,
+    options?: grpc.CallOptions,
+  ): grpc.ClientReadableStream<GenerationResponse__Output>;
+  GenerateStream(
+    argument: SingleGenerationRequest,
+    options?: grpc.CallOptions,
+  ): grpc.ClientReadableStream<GenerationResponse__Output>;
+  generateStream(
+    argument: SingleGenerationRequest,
+    metadata: grpc.Metadata,
+    options?: grpc.CallOptions,
+  ): grpc.ClientReadableStream<GenerationResponse__Output>;
+  generateStream(
+    argument: SingleGenerationRequest,
+    options?: grpc.CallOptions,
+  ): grpc.ClientReadableStream<GenerationResponse__Output>;
+  ModelInfo(
+    argument: ModelInfoRequest,
+    metadata: grpc.Metadata,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<ModelInfoResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  ModelInfo(
+    argument: ModelInfoRequest,
+    metadata: grpc.Metadata,
+    callback: grpc.requestCallback<ModelInfoResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  ModelInfo(
+    argument: ModelInfoRequest,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<ModelInfoResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  ModelInfo(
+    argument: ModelInfoRequest,
+    callback: grpc.requestCallback<ModelInfoResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  modelInfo(
+    argument: ModelInfoRequest,
+    metadata: grpc.Metadata,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<ModelInfoResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  modelInfo(
+    argument: ModelInfoRequest,
+    metadata: grpc.Metadata,
+    callback: grpc.requestCallback<ModelInfoResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  modelInfo(
+    argument: ModelInfoRequest,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<ModelInfoResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  modelInfo(
+    argument: ModelInfoRequest,
+    callback: grpc.requestCallback<ModelInfoResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  Tokenize(
+    argument: BatchedTokenizeRequest,
+    metadata: grpc.Metadata,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<BatchedTokenizeResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  Tokenize(
+    argument: BatchedTokenizeRequest,
+    metadata: grpc.Metadata,
+    callback: grpc.requestCallback<BatchedTokenizeResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  Tokenize(
+    argument: BatchedTokenizeRequest,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<BatchedTokenizeResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  Tokenize(
+    argument: BatchedTokenizeRequest,
+    callback: grpc.requestCallback<BatchedTokenizeResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  tokenize(
+    argument: BatchedTokenizeRequest,
+    metadata: grpc.Metadata,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<BatchedTokenizeResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  tokenize(
+    argument: BatchedTokenizeRequest,
+    metadata: grpc.Metadata,
+    callback: grpc.requestCallback<BatchedTokenizeResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  tokenize(
+    argument: BatchedTokenizeRequest,
+    options: grpc.CallOptions,
+    callback: grpc.requestCallback<BatchedTokenizeResponse__Output>,
+  ): grpc.ClientUnaryCall;
+  tokenize(
+    argument: BatchedTokenizeRequest,
+    callback: grpc.requestCallback<BatchedTokenizeResponse__Output>,
+  ): grpc.ClientUnaryCall;
+}
+export interface GenerationServiceDefinition extends grpc.ServiceDefinition {
+  Generate: MethodDefinition<
+    BatchedGenerationRequest,
+    BatchedGenerationResponse,
+    BatchedGenerationRequest__Output,
+    BatchedGenerationResponse__Output
+  >;
+  GenerateStream: MethodDefinition<
+    SingleGenerationRequest,
+    GenerationResponse,
+    SingleGenerationRequest__Output,
+    GenerationResponse__Output
+  >;
+  ModelInfo: MethodDefinition<
+    ModelInfoRequest,
+    ModelInfoResponse,
+    ModelInfoRequest__Output,
+    ModelInfoResponse__Output
+  >;
+  Tokenize: MethodDefinition<
+    BatchedTokenizeRequest,
+    BatchedTokenizeResponse,
+    BatchedTokenizeRequest__Output,
+    BatchedTokenizeResponse__Output
+  >;
+}
+
+export type SubtypeConstructor<Constructor extends new (...args: any) => any, Subtype> = new (
+  ...args: ConstructorParameters<Constructor>
+) => Subtype;
+export interface ProtoGrpcType {
+  fmaas: {
+    BatchedGenerationRequest: MessageTypeDefinition;
+    BatchedGenerationResponse: MessageTypeDefinition;
+    BatchedTokenizeRequest: MessageTypeDefinition;
+    BatchedTokenizeResponse: MessageTypeDefinition;
+    DecodingMethod: EnumTypeDefinition;
+    DecodingParameters: MessageTypeDefinition;
+    GenerationRequest: MessageTypeDefinition;
+    GenerationResponse: MessageTypeDefinition;
+    GenerationService: SubtypeConstructor<typeof grpc.Client, GenerationServiceClient> & {
+      service: GenerationServiceDefinition;
+    };
+    ModelInfoRequest: MessageTypeDefinition;
+    ModelInfoResponse: MessageTypeDefinition;
+    Parameters: MessageTypeDefinition;
+    ResponseOptions: MessageTypeDefinition;
+    SamplingParameters: MessageTypeDefinition;
+    SingleGenerationRequest: MessageTypeDefinition;
+    StopReason: EnumTypeDefinition;
+    StoppingCriteria: MessageTypeDefinition;
+    TokenInfo: MessageTypeDefinition;
+    TokenizeRequest: MessageTypeDefinition;
+    TokenizeResponse: MessageTypeDefinition;
+  };
+}
diff --git a/tests/e2e/adapters/ibm-vllm/chat.test.ts b/tests/e2e/adapters/ibm-vllm/chat.test.ts
new file mode 100644
index 00000000..165d0d3b
--- /dev/null
+++ b/tests/e2e/adapters/ibm-vllm/chat.test.ts
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { BaseMessage } from "@/llms/primitives/message.js";
+import { expect } from "vitest";
+import { verifyDeserialization } from "@tests/e2e/utils.js";
+import { IBMVllmChatLLM } from "@/adapters/ibm-vllm/chat.js";
+
+describe.runIf(
+  [
+    process.env.IBM_VLLM_URL,
+    process.env.IBM_VLLM_ROOT_CERT,
+    process.env.IBM_VLLM_PRIVATE_KEY,
+    process.env.IBM_VLLM_CERT_CHAIN,
+  ].every((env) => Boolean(env)),
+)("IBM Chat vLLM", () => {
+  const createChatLLM = () => {
+    return IBMVllmChatLLM.fromPreset("meta-llama/llama-3-1-70b-instruct", {
+      parameters: {
+        method: "GREEDY",
+        stopping: {
+          min_new_tokens: 5,
+          max_new_tokens: 50,
+        },
+      },
+    });
+  };
+
+  it("Generates", async () => {
+    const conversation = [
+      BaseMessage.of({
+        role: "system",
+        text: `You are a helpful and respectful and honest assistant. Your answer should be short and concise.`,
+      }),
+    ];
+    const llm = createChatLLM();
+
+    for (const { question, answer } of [
+      { question: `What is the coldest continent?`, answer: "arctica" },
+      { question: "What is the most common typical animal that lives there?", answer: "penguin" },
+    ]) {
+      conversation.push(
+        BaseMessage.of({
+          role: "user",
+          text: question,
+        }),
+      );
+      const response = await llm.generate(conversation);
+
+      const newMessages = response.messages;
+      expect(newMessages).toHaveLength(1);
+      expect(newMessages[0].text.toLowerCase()).toContain(answer.toLowerCase());
+      conversation.push(...newMessages);
+    }
+  });
+
+  it("Serializes", () => {
+    const llm = createChatLLM();
+    const serialized = llm.serialize();
+    const deserialized = IBMVllmChatLLM.fromSerialized(serialized);
+    verifyDeserialization(llm, deserialized);
+  });
+});
diff --git a/tests/e2e/adapters/ibm-vllm/llm.test.ts b/tests/e2e/adapters/ibm-vllm/llm.test.ts
new file mode 100644
index 00000000..24496aee
--- /dev/null
+++ b/tests/e2e/adapters/ibm-vllm/llm.test.ts
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2024 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { verifyDeserialization } from "@tests/e2e/utils.js";
+import { IBMvLLM, IBMvLLMOutput } from "@/adapters/ibm-vllm/llm.js";
+import { IBMVllmModel } from "@/adapters/ibm-vllm/chatPreset.js";
+
+describe.runIf(
+  [
+    process.env.IBM_VLLM_URL,
+    process.env.IBM_VLLM_ROOT_CERT,
+    process.env.IBM_VLLM_PRIVATE_KEY,
+    process.env.IBM_VLLM_CERT_CHAIN,
+  ].every((env) => Boolean(env)),
+)("IBM vLLM", () => {
+  const createLLM = () => {
+    return new IBMvLLM({ modelId: IBMVllmModel.LLAMA_3_1_70B_INSTRUCT });
+  };
+
+  it("Meta", async () => {
+    const llm = createLLM();
+    const response = await llm.meta();
+    expect(response.tokenLimit).toBeGreaterThan(0);
+  });
+
+  it("Generates", async () => {
+    const llm = createLLM();
+    const response = await llm.generate("Hello world!");
+    expect(response).toBeInstanceOf(IBMvLLMOutput);
+  });
+
+  it("Streams", async () => {
+    const llm = createLLM();
+    for await (const chunk of llm.stream("Hello world!")) {
+      expect(chunk).toBeInstanceOf(IBMvLLMOutput);
+      expect(chunk.text).toBeTruthy();
+    }
+  });
+
+  it("Serializes", () => {
+    const llm = createLLM();
+    const serialized = llm.serialize();
+    const deserialized = IBMvLLM.fromSerialized(serialized);
+    verifyDeserialization(llm, deserialized);
+  });
+});
diff --git a/tests/e2e/utils.ts b/tests/e2e/utils.ts
index 2293c4c7..192129ec 100644
--- a/tests/e2e/utils.ts
+++ b/tests/e2e/utils.ts
@@ -112,7 +112,6 @@ export function verifyDeserialization(
       if (target instanceof ZodType) {
         target = toJsonSchema(target);
       }
-
       Serializer.findFactory(target);
       verifyDeserialization(value, target, parent, path.concat(key), ignoredPaths);
     }
diff --git a/yarn.lock b/yarn.lock
index 5fcbb358..b8e63753 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -718,6 +718,30 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@grpc/grpc-js@npm:^1.11.3":
+  version: 1.11.3
+  resolution: "@grpc/grpc-js@npm:1.11.3"
+  dependencies:
+    "@grpc/proto-loader": "npm:^0.7.13"
+    "@js-sdsl/ordered-map": "npm:^4.4.2"
+  checksum: 10c0/2946a70c709688737603be573f6836beea26e4c132a50164591020860ae0e62375c1475c26017011fabfbaf6a9fa2bfdabfe9058aed11bab2f697e4242533afc
+  languageName: node
+  linkType: hard
+
+"@grpc/proto-loader@npm:^0.7.13":
+  version: 0.7.13
+  resolution: "@grpc/proto-loader@npm:0.7.13"
+  dependencies:
+    lodash.camelcase: "npm:^4.3.0"
+    long: "npm:^5.0.0"
+    protobufjs: "npm:^7.2.5"
+    yargs: "npm:^17.7.2"
+  bin:
+    proto-loader-gen-types: build/bin/proto-loader-gen-types.js
+  checksum: 10c0/dc8ed7aa1454c15e224707cc53d84a166b98d76f33606a9f334c7a6fb1aedd3e3614dcd2c2b02a6ffaf140587d19494f93b3a56346c6c2e26bc564f6deddbbf3
+  languageName: node
+  linkType: hard
+
 "@humanwhocodes/module-importer@npm:^1.0.1":
   version: 1.0.1
   resolution: "@humanwhocodes/module-importer@npm:1.0.1"
@@ -825,6 +849,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@js-sdsl/ordered-map@npm:^4.4.2":
+  version: 4.4.2
+  resolution: "@js-sdsl/ordered-map@npm:4.4.2"
+  checksum: 10c0/cc7e15dc4acf6d9ef663757279600bab70533d847dcc1ab01332e9e680bd30b77cdf9ad885cc774276f51d98b05a013571c940e5b360985af5eb798dc1a2ee2b
+  languageName: node
+  linkType: hard
+
 "@jsdevtools/ono@npm:^7.1.3":
   version: 7.1.3
   resolution: "@jsdevtools/ono@npm:7.1.3"
@@ -1506,6 +1537,79 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@protobufjs/aspromise@npm:^1.1.1, @protobufjs/aspromise@npm:^1.1.2":
+  version: 1.1.2
+  resolution: "@protobufjs/aspromise@npm:1.1.2"
+  checksum: 10c0/a83343a468ff5b5ec6bff36fd788a64c839e48a07ff9f4f813564f58caf44d011cd6504ed2147bf34835bd7a7dd2107052af755961c6b098fd8902b4f6500d0f
+  languageName: node
+  linkType: hard
+
+"@protobufjs/base64@npm:^1.1.2":
+  version: 1.1.2
+  resolution: "@protobufjs/base64@npm:1.1.2"
+  checksum: 10c0/eec925e681081af190b8ee231f9bad3101e189abbc182ff279da6b531e7dbd2a56f1f306f37a80b1be9e00aa2d271690d08dcc5f326f71c9eed8546675c8caf6
+  languageName: node
+  linkType: hard
+
+"@protobufjs/codegen@npm:^2.0.4":
+  version: 2.0.4
+  resolution: "@protobufjs/codegen@npm:2.0.4"
+  checksum: 10c0/26ae337c5659e41f091606d16465bbcc1df1f37cc1ed462438b1f67be0c1e28dfb2ca9f294f39100c52161aef82edf758c95d6d75650a1ddf31f7ddee1440b43
+  languageName: node
+  linkType: hard
+
+"@protobufjs/eventemitter@npm:^1.1.0":
+  version: 1.1.0
+  resolution: "@protobufjs/eventemitter@npm:1.1.0"
+  checksum: 10c0/1eb0a75180e5206d1033e4138212a8c7089a3d418c6dfa5a6ce42e593a4ae2e5892c4ef7421f38092badba4040ea6a45f0928869989411001d8c1018ea9a6e70
+  languageName: node
+  linkType: hard
+
+"@protobufjs/fetch@npm:^1.1.0":
+  version: 1.1.0
+  resolution: "@protobufjs/fetch@npm:1.1.0"
+  dependencies:
+    "@protobufjs/aspromise": "npm:^1.1.1"
+    "@protobufjs/inquire": "npm:^1.1.0"
+  checksum: 10c0/cda6a3dc2d50a182c5865b160f72077aac197046600091dbb005dd0a66db9cce3c5eaed6d470ac8ed49d7bcbeef6ee5f0bc288db5ff9a70cbd003e5909065233
+  languageName: node
+  linkType: hard
+
+"@protobufjs/float@npm:^1.0.2":
+  version: 1.0.2
+  resolution: "@protobufjs/float@npm:1.0.2"
+  checksum: 10c0/18f2bdede76ffcf0170708af15c9c9db6259b771e6b84c51b06df34a9c339dbbeec267d14ce0bddd20acc142b1d980d983d31434398df7f98eb0c94a0eb79069
+  languageName: node
+  linkType: hard
+
+"@protobufjs/inquire@npm:^1.1.0":
+  version: 1.1.0
+  resolution: "@protobufjs/inquire@npm:1.1.0"
+  checksum: 10c0/64372482efcba1fb4d166a2664a6395fa978b557803857c9c03500e0ac1013eb4b1aacc9ed851dd5fc22f81583670b4f4431bae186f3373fedcfde863ef5921a
+  languageName: node
+  linkType: hard
+
+"@protobufjs/path@npm:^1.1.2":
+  version: 1.1.2
+  resolution: "@protobufjs/path@npm:1.1.2"
+  checksum: 10c0/cece0a938e7f5dfd2fa03f8c14f2f1cf8b0d6e13ac7326ff4c96ea311effd5fb7ae0bba754fbf505312af2e38500250c90e68506b97c02360a43793d88a0d8b4
+  languageName: node
+  linkType: hard
+
+"@protobufjs/pool@npm:^1.1.0":
+  version: 1.1.0
+  resolution: "@protobufjs/pool@npm:1.1.0"
+  checksum: 10c0/eda2718b7f222ac6e6ad36f758a92ef90d26526026a19f4f17f668f45e0306a5bd734def3f48f51f8134ae0978b6262a5c517c08b115a551756d1a3aadfcf038
+  languageName: node
+  linkType: hard
+
+"@protobufjs/utf8@npm:^1.1.0":
+  version: 1.1.0
+  resolution: "@protobufjs/utf8@npm:1.1.0"
+  checksum: 10c0/a3fe31fe3fa29aa3349e2e04ee13dc170cc6af7c23d92ad49e3eeaf79b9766264544d3da824dba93b7855bd6a2982fb40032ef40693da98a136d835752beb487
+  languageName: node
+  linkType: hard
+
 "@redocly/ajv@npm:^8.11.0":
   version: 8.11.0
   resolution: "@redocly/ajv@npm:8.11.0"
@@ -2027,6 +2131,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/node@npm:>=13.7.0":
+  version: 22.7.4
+  resolution: "@types/node@npm:22.7.4"
+  dependencies:
+    undici-types: "npm:~6.19.2"
+  checksum: 10c0/c22bf54515c78ff3170142c1e718b90e2a0003419dc2d55f79c9c9362edd590a6ab1450deb09ff6e1b32d1b4698da407930b16285e8be3a009ea6cd2695cac01
+  languageName: node
+  linkType: hard
+
 "@types/node@npm:^18.11.18":
   version: 18.19.45
   resolution: "@types/node@npm:18.19.45"
@@ -2633,6 +2746,8 @@ __metadata:
     "@eslint/js": "npm:^9.9.0"
     "@eslint/markdown": "npm:^6.0.0"
     "@googleapis/customsearch": "npm:^3.2.0"
+    "@grpc/grpc-js": "npm:^1.11.3"
+    "@grpc/proto-loader": "npm:^0.7.13"
     "@ibm-generative-ai/node-sdk": "npm:~3.2.3"
     "@langchain/community": "npm:~0.2.28"
     "@langchain/core": "npm:~0.2.27"
@@ -2697,7 +2812,7 @@ __metadata:
     strip-ansi: "npm:^7.1.0"
     temp-dir: "npm:^3.0.0"
     tsc-files: "npm:^1.1.4"
-    tsup: "npm:^8.2.4"
+    tsup: "npm:^8.3.0"
     tsx: "npm:^4.19.0"
     turndown: "npm:^7.2.0"
     typescript: "npm:^5.5.4"
@@ -2710,6 +2825,8 @@ __metadata:
     zod-to-json-schema: "npm:^3.23.3"
   peerDependencies:
     "@googleapis/customsearch": ^3.2.0
+    "@grpc/grpc-js": ^1.11.3
+    "@grpc/proto-loader": ^0.7.13
     "@ibm-generative-ai/node-sdk": ~3.2.3
     "@langchain/community": ~0.2.28
     "@langchain/core": ~0.2.27
@@ -4552,6 +4669,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"fdir@npm:^6.3.0":
+  version: 6.4.0
+  resolution: "fdir@npm:6.4.0"
+  peerDependencies:
+    picomatch: ^3 || ^4
+  peerDependenciesMeta:
+    picomatch:
+      optional: true
+  checksum: 10c0/9a03efa1335d78ea386b701799b08ad9e7e8da85d88567dc162cd28dd8e9486e8c269b3e95bfeb21dd6a5b14ebf69d230eb6e18f49d33fbda3cd97432f648c48
+  languageName: node
+  linkType: hard
+
 "fetch-blob@npm:^3.1.2, fetch-blob@npm:^3.1.4":
   version: 3.2.0
   resolution: "fetch-blob@npm:3.2.0"
@@ -6553,6 +6682,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"long@npm:^5.0.0":
+  version: 5.2.3
+  resolution: "long@npm:5.2.3"
+  checksum: 10c0/6a0da658f5ef683b90330b1af76f06790c623e148222da9d75b60e266bbf88f803232dd21464575681638894a84091616e7f89557aa087fd14116c0f4e0e43d9
+  languageName: node
+  linkType: hard
+
 "longest-streak@npm:^3.0.0":
   version: 3.1.0
   resolution: "longest-streak@npm:3.1.0"
@@ -8315,6 +8451,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"picomatch@npm:^4.0.2":
+  version: 4.0.2
+  resolution: "picomatch@npm:4.0.2"
+  checksum: 10c0/7c51f3ad2bb42c776f49ebf964c644958158be30d0a510efd5a395e8d49cb5acfed5b82c0c5b365523ce18e6ab85013c9ebe574f60305892ec3fa8eee8304ccc
+  languageName: node
+  linkType: hard
+
 "pidtree@npm:~0.6.0":
   version: 0.6.0
   resolution: "pidtree@npm:0.6.0"
@@ -8533,6 +8676,26 @@ __metadata:
   languageName: node
   linkType: hard
 
+"protobufjs@npm:^7.2.5":
+  version: 7.4.0
+  resolution: "protobufjs@npm:7.4.0"
+  dependencies:
+    "@protobufjs/aspromise": "npm:^1.1.2"
+    "@protobufjs/base64": "npm:^1.1.2"
+    "@protobufjs/codegen": "npm:^2.0.4"
+    "@protobufjs/eventemitter": "npm:^1.1.0"
+    "@protobufjs/fetch": "npm:^1.1.0"
+    "@protobufjs/float": "npm:^1.0.2"
+    "@protobufjs/inquire": "npm:^1.1.0"
+    "@protobufjs/path": "npm:^1.1.2"
+    "@protobufjs/pool": "npm:^1.1.0"
+    "@protobufjs/utf8": "npm:^1.1.0"
+    "@types/node": "npm:>=13.7.0"
+    long: "npm:^5.0.0"
+  checksum: 10c0/a5460a63fe596523b9a067cbce39a6b310d1a71750fda261f076535662aada97c24450e18c5bc98a27784f70500615904ff1227e1742183509f0db4fdede669b
+  languageName: node
+  linkType: hard
+
 "protocols@npm:^2.0.0, protocols@npm:^2.0.1":
   version: 2.0.1
   resolution: "protocols@npm:2.0.1"
@@ -9836,6 +9999,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"tinyglobby@npm:^0.2.1":
+  version: 0.2.6
+  resolution: "tinyglobby@npm:0.2.6"
+  dependencies:
+    fdir: "npm:^6.3.0"
+    picomatch: "npm:^4.0.2"
+  checksum: 10c0/d7b5eb4c5b9c341f961c1d3c30624f9a1e22b27b48a79a65b48120245a77c143827f75f5854628fef1a4bd4bc3cfaf06ce76497f3a574e3f933229c5e556e5fe
+  languageName: node
+  linkType: hard
+
 "tinypool@npm:^1.0.0":
   version: 1.0.1
   resolution: "tinypool@npm:1.0.1"
@@ -9955,9 +10128,9 @@ __metadata:
   languageName: node
   linkType: hard
 
-"tsup@npm:^8.2.4":
-  version: 8.2.4
-  resolution: "tsup@npm:8.2.4"
+"tsup@npm:^8.3.0":
+  version: 8.3.0
+  resolution: "tsup@npm:8.3.0"
   dependencies:
     bundle-require: "npm:^5.0.0"
     cac: "npm:^6.7.14"
@@ -9966,7 +10139,6 @@ __metadata:
     debug: "npm:^4.3.5"
     esbuild: "npm:^0.23.0"
     execa: "npm:^5.1.1"
-    globby: "npm:^11.1.0"
     joycon: "npm:^3.1.1"
     picocolors: "npm:^1.0.1"
     postcss-load-config: "npm:^6.0.1"
@@ -9974,6 +10146,7 @@ __metadata:
     rollup: "npm:^4.19.0"
     source-map: "npm:0.8.0-beta.0"
     sucrase: "npm:^3.35.0"
+    tinyglobby: "npm:^0.2.1"
     tree-kill: "npm:^1.2.2"
   peerDependencies:
     "@microsoft/api-extractor": ^7.36.0
@@ -9992,7 +10165,7 @@ __metadata:
   bin:
     tsup: dist/cli-default.js
     tsup-node: dist/cli-node.js
-  checksum: 10c0/0fa967ae0feb483528ae52fd9988d7931a092b3645a456e23b62ac03a0556da6f569fb2a77b3baf50ff1d12b9afac38aa9d29f15eb82c8a2b4faeda8362858da
+  checksum: 10c0/7f7132e48fca2284fd721077c6462c440dabdc95bcacf2e9837f81d2ca9771f804ff4f8b81a743e8fc6c3def856cf3ae99421c0568a7f030196abdc9e12e97e8
   languageName: node
   linkType: hard
 
@@ -10802,7 +10975,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"yargs@npm:^17.0.0":
+"yargs@npm:^17.0.0, yargs@npm:^17.7.2":
   version: 17.7.2
   resolution: "yargs@npm:17.7.2"
   dependencies: