firebase · pavelgj · Jul 23, 2024 · Jul 22, 2024 · Jul 23, 2024 · Jul 23, 2024
diff --git a/docs/plugins/vertex-ai.md b/docs/plugins/vertex-ai.md
@@ -148,7 +148,9 @@ export default configureGenkit({
   plugins: [
     vertexAI({
       location: 'us-central1',
-      modelGardenModels: [claude3Haiku, claude3Sonnet, claude3Opus],
+      modelGarden: {
+        models: [claude3Haiku, claude3Sonnet, claude3Opus],
+      },
     }),
   ],
 });
@@ -163,6 +165,36 @@ const llmResponse = await generate({
 });
 ```
 
+#### Llama 3.1 405b on Vertex AI Model Garden
+
+If you have access to Llama 3.1 405b in Vertex AI Model Garden you can use it with Genkit.
+
+Here's sample configuration for enabling Vertex AI Model Garden models:
+
+```js
+import { vertexAI, llama3 } from '@genkit-ai/vertexai';
+
+export default configureGenkit({
+  plugins: [
+    vertexAI({
+      location: 'us-central1',
+      modelGarden: {
+        models: [llama3],
+      },
+    }),
+  ],
+});
+```
+
+Then use it as regular models:
+
+```js
+const llmResponse = await generate({
+  model: llama3,
+  prompt: 'Write a function that adds two numbers together',
+});
+```
+
 ### Evaluators
 
 To use the evaluators from Vertex AI Rapid Evaluation, add an `evaluation` block to your `vertexAI` plugin configuration.

diff --git a/js/plugins/vertexai/package.json b/js/plugins/vertexai/package.json
@@ -37,12 +37,15 @@
   "dependencies": {
     "@anthropic-ai/sdk": "^0.24.3",
     "@anthropic-ai/vertex-sdk": "^0.4.0",
-    "@genkit-ai/ai": "workspace:*",
-    "@genkit-ai/core": "workspace:*",
-    "@genkit-ai/flow": "workspace:*",
     "@google-cloud/vertexai": "^1.1.0",
     "google-auth-library": "^9.6.3",
     "node-fetch": "^3.3.2",
+    "openai": "^4.52.7"
+  },
+  "peerDependencies": {
+    "@genkit-ai/ai": "workspace:*",
+    "@genkit-ai/core": "workspace:*",
+    "@genkit-ai/flow": "workspace:*",
     "zod": "^3.22.4"
   },
   "devDependencies": {

diff --git a/js/plugins/vertexai/src/index.ts b/js/plugins/vertexai/src/index.ts
@@ -53,6 +53,11 @@ import {
   SUPPORTED_GEMINI_MODELS,
 } from './gemini.js';
 import { imagen2, imagen2Model } from './imagen.js';
+import {
+  llama3,
+  modelGardenOpenaiCompatibleModel,
+  SUPPORTED_OPENAI_FORMAT_MODELS,
+} from './model_garden.js';
 
 export {
   claude35Sonnet,
@@ -66,6 +71,7 @@ export {
   geminiPro,
   geminiProVision,
   imagen2,
+  llama3,
   textEmbedding004,
   textEmbeddingGecko,
   textEmbeddingGecko001,
@@ -87,7 +93,14 @@ export interface PluginOptions {
   evaluation?: {
     metrics: VertexAIEvaluationMetric[];
   };
+  /**
+   * @deprecated use `modelGarden.models`
+   */
   modelGardenModels?: ModelReference<any>[];
+  modelGarden?: {
+    models: ModelReference<any>[];
+    openAiBaseUrlTemplate?: string;
+  };
 }
 
 const CLOUD_PLATFROM_OAUTH_SCOPE =
@@ -134,15 +147,33 @@ export const vertexAI: Plugin<[PluginOptions] | []> = genkitPlugin(
       ),
     ];
 
-    if (options?.modelGardenModels) {
-      options?.modelGardenModels.forEach((m) => {
-        const entry = Object.entries(SUPPORTED_ANTHROPIC_MODELS).find(
+    if (options?.modelGardenModels || options?.modelGarden?.models) {
+      const mgModels =
+        options?.modelGardenModels || options?.modelGarden?.models;
+      mgModels!.forEach((m) => {
+        const anthropicEntry = Object.entries(SUPPORTED_ANTHROPIC_MODELS).find(
+          ([_, value]) => value.name === m.name
+        );
+        if (anthropicEntry) {
+          models.push(anthropicModel(anthropicEntry[0], projectId, location));
+          return;
+        }
+        const openaiModel = Object.entries(SUPPORTED_OPENAI_FORMAT_MODELS).find(
           ([_, value]) => value.name === m.name
         );
-        if (!entry) {
-          throw new Error(`Unsupported model garden model: ${m.name}`);
+        if (openaiModel) {
+          models.push(
+            modelGardenOpenaiCompatibleModel(
+              openaiModel[0],
+              projectId,
+              location,
+              authClient,
+              options.modelGarden?.openAiBaseUrlTemplate
+            )
+          );
+          return;
         }
-        models.push(anthropicModel(entry[0], projectId, location));
+        throw new Error(`Unsupported model garden model: ${m.name}`);
       });
     }
 

diff --git a/js/plugins/vertexai/src/model_garden.ts b/js/plugins/vertexai/src/model_garden.ts
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2024 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { ModelAction, modelRef } from '@genkit-ai/ai/model';
+import { GENKIT_CLIENT_HEADER } from '@genkit-ai/core';
+import { GoogleAuth } from 'google-auth-library';
+import OpenAI from 'openai';
+
+import {
+  openaiCompatibleModel,
+  OpenAIConfigSchema,
+} from './openai_compatibility.js';
+
+const ACCESS_TOKEN_TTL = 50 * 60 * 1000; // cache access token for 50 minutes
+
+export const llama3 = modelRef({
+  name: 'vertexai/llama3-405b',
+  info: {
+    label: 'Llama 3.1 405b',
+    supports: {
+      multiturn: true,
+      tools: true,
+      media: false,
+      systemRole: true,
+      output: ['text'],
+    },
+    versions: ['meta/llama3-405b-instruct-maas'],
+  },
+  configSchema: OpenAIConfigSchema,
+  version: 'meta/llama3-405b-instruct-maas',
+});
+
+export const SUPPORTED_OPENAI_FORMAT_MODELS = {
+  'llama3-405b': llama3,
+};
+
+export function modelGardenOpenaiCompatibleModel(
+  name: string,
+  projectId: string,
+  location: string,
+  googleAuth: GoogleAuth,
+  baseUrlTemplate: string | undefined
+): ModelAction<typeof OpenAIConfigSchema> {
+  const model = SUPPORTED_OPENAI_FORMAT_MODELS[name];
+  if (!model) throw new Error(`Unsupported model: ${name}`);
+  if (!baseUrlTemplate) {
+    baseUrlTemplate =
+      'https://{location}-aiplatform.googleapis.com/v1beta1/projects/{projectId}/locations/{location}/endpoints/openapi';
+  }
+
+  let accessToken: string | null | undefined;
+  let accessTokenFetchTime = 0;
+  var clientCache: OpenAI;
+  const clientFactory = async () => {
+    if (
+      !clientCache ||
+      !accessToken ||
+      accessTokenFetchTime + ACCESS_TOKEN_TTL < Date.now()
+    ) {
+      accessToken = await googleAuth.getAccessToken();
+      accessTokenFetchTime = Date.now();
+      clientCache = new OpenAI({
+        baseURL: baseUrlTemplate!
+          .replace(/{location}/g, location)
+          .replace(/{projectId}/g, projectId),
+        apiKey: accessToken!,
+        defaultHeaders: {
+          'X-Goog-Api-Client': GENKIT_CLIENT_HEADER,
+        },
+      });
+    }
+
+    return clientCache;
+  };
+  return openaiCompatibleModel(model, clientFactory);
+}