From 32cd78f2213dbdd70d483d68371b9e96893d23f6 Mon Sep 17 00:00:00 2001
From: Arvin Xu <arvinx@foxmail.com>
Date: Wed, 10 Apr 2024 06:07:16 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9C=85=20test:=20fix=20OpenRouter=20models?=
 =?UTF-8?q?=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../__snapshots__/index.test.ts.snap          | 82 +++++++++++++++++++
 .../openrouter/fixtures/models.json           | 62 ++++++++++++++
 .../agent-runtime/openrouter/index.test.ts    | 13 +++
 src/libs/agent-runtime/openrouter/index.ts    |  2 +-
 4 files changed, 158 insertions(+), 1 deletion(-)
 create mode 100644 src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap
 create mode 100644 src/libs/agent-runtime/openrouter/fixtures/models.json

diff --git a/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap b/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap
new file mode 100644
index 000000000000..b8a5781a0f86
--- /dev/null
+++ b/src/libs/agent-runtime/openrouter/__snapshots__/index.test.ts.snap
@@ -0,0 +1,82 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`LobeOpenRouterAI > models > should get models 1`] = `
+[
+  {
+    "description": "LLaVA is a large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking [GPT-4](/models/openai/gpt-4-vision-preview) and setting a new state-of-the-art accuracy on Science QA
+
+#multimodal",
+    "displayName": "Llava 13B",
+    "enabled": false,
+    "functionCall": false,
+    "id": "haotian-liu/llava-13b",
+    "maxTokens": undefined,
+    "tokens": 2048,
+    "vision": true,
+  },
+  {
+    "description": "This vision-language model builds on innovations from the popular [OpenHermes-2.5](/models/teknium/openhermes-2.5-mistral-7b) model, by Teknium. It adds vision support, and is trained on a custom dataset enriched with function calling
+
+This project is led by [qnguyen3](https://twitter.com/stablequan) and [teknium](https://twitter.com/Teknium1).
+
+#multimodal",
+    "displayName": "Nous: Hermes 2 Vision 7B (alpha)",
+    "enabled": false,
+    "functionCall": true,
+    "id": "nousresearch/nous-hermes-2-vision-7b",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": true,
+  },
+  {
+    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.
+
+Updated by OpenAI to point to the [latest version of GPT-3.5](/models?q=openai/gpt-3.5). Training data up to Sep 2021.",
+    "displayName": "OpenAI: GPT-3.5 Turbo",
+    "enabled": true,
+    "functionCall": false,
+    "id": "openai/gpt-3.5-turbo",
+    "maxTokens": 4096,
+    "tokens": 16385,
+    "vision": false,
+  },
+  {
+    "description": "Ability to understand images, in addition to all other [GPT-4 Turbo capabilties](/models/openai/gpt-4-turbo). Training data: up to Apr 2023.
+
+**Note:** heavily rate limited by OpenAI while in preview.
+
+#multimodal",
+    "displayName": "OpenAI: GPT-4 Vision",
+    "enabled": true,
+    "functionCall": false,
+    "id": "openai/gpt-4-vision-preview",
+    "maxTokens": 4096,
+    "tokens": 128000,
+    "vision": true,
+  },
+  {
+    "description": "Gemma by Google is an advanced, open-source language model family, leveraging the latest in decoder-only, text-to-text technology. It offers English language capabilities across text generation tasks like question answering, summarization, and reasoning. The Gemma 7B variant is comparable in performance to leading open source models.
+
+Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
+    "displayName": "Google: Gemma 7B",
+    "enabled": true,
+    "functionCall": false,
+    "id": "google/gemma-7b-it",
+    "maxTokens": undefined,
+    "tokens": 8192,
+    "vision": false,
+  },
+  {
+    "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge
+
+Note: this is a higher-throughput version of [this model](/models/gryphe/mythomax-l2-13b), and may have higher prices and slightly different outputs.",
+    "displayName": "MythoMax 13B (nitro)",
+    "enabled": false,
+    "functionCall": false,
+    "id": "gryphe/mythomax-l2-13b:nitro",
+    "maxTokens": undefined,
+    "tokens": 4096,
+    "vision": false,
+  },
+]
+`;
diff --git a/src/libs/agent-runtime/openrouter/fixtures/models.json b/src/libs/agent-runtime/openrouter/fixtures/models.json
new file mode 100644
index 000000000000..1fe6a23fef1a
--- /dev/null
+++ b/src/libs/agent-runtime/openrouter/fixtures/models.json
@@ -0,0 +1,62 @@
+[
+  {
+    "id": "haotian-liu/llava-13b",
+    "name": "Llava 13B",
+    "description": "LLaVA is a large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding, achieving impressive chat capabilities mimicking [GPT-4](/models/openai/gpt-4-vision-preview) and setting a new state-of-the-art accuracy on Science QA\n\n#multimodal",
+    "pricing": { "prompt": "0.000005", "completion": "0.000005", "image": "0", "request": "0" },
+    "context_length": 2048,
+    "architecture": { "modality": "multimodal", "tokenizer": "Llama2", "instruct_type": null },
+    "top_provider": { "max_completion_tokens": null, "is_moderated": false },
+    "per_request_limits": { "prompt_tokens": "891204", "completion_tokens": "891204" }
+  },
+  {
+    "id": "nousresearch/nous-hermes-2-vision-7b",
+    "name": "Nous: Hermes 2 Vision 7B (alpha)",
+    "description": "This vision-language model builds on innovations from the popular [OpenHermes-2.5](/models/teknium/openhermes-2.5-mistral-7b) model, by Teknium. It adds vision support, and is trained on a custom dataset enriched with function calling\n\nThis project is led by [qnguyen3](https://twitter.com/stablequan) and [teknium](https://twitter.com/Teknium1).\n\n#multimodal",
+    "pricing": { "prompt": "0.000005", "completion": "0.000005", "image": "0", "request": "0" },
+    "context_length": 4096,
+    "architecture": { "modality": "multimodal", "tokenizer": "Mistral", "instruct_type": null },
+    "top_provider": { "max_completion_tokens": null, "is_moderated": false },
+    "per_request_limits": { "prompt_tokens": "891204", "completion_tokens": "891204" }
+  },
+  {
+    "id": "openai/gpt-3.5-turbo",
+    "name": "OpenAI: GPT-3.5 Turbo",
+    "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nUpdated by OpenAI to point to the [latest version of GPT-3.5](/models?q=openai/gpt-3.5). Training data up to Sep 2021.",
+    "pricing": { "prompt": "0.0000005", "completion": "0.0000015", "image": "0", "request": "0" },
+    "context_length": 16385,
+    "architecture": { "modality": "text", "tokenizer": "GPT", "instruct_type": null },
+    "top_provider": { "max_completion_tokens": 4096, "is_moderated": true },
+    "per_request_limits": { "prompt_tokens": "8912044", "completion_tokens": "2970681" }
+  },
+  {
+    "id": "openai/gpt-4-vision-preview",
+    "name": "OpenAI: GPT-4 Vision",
+    "description": "Ability to understand images, in addition to all other [GPT-4 Turbo capabilties](/models/openai/gpt-4-turbo). Training data: up to Apr 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.\n\n#multimodal",
+    "pricing": { "prompt": "0.00001", "completion": "0.00003", "image": "0.01445", "request": "0" },
+    "context_length": 128000,
+    "architecture": { "modality": "multimodal", "tokenizer": "GPT", "instruct_type": null },
+    "top_provider": { "max_completion_tokens": 4096, "is_moderated": true },
+    "per_request_limits": { "prompt_tokens": "445602", "completion_tokens": "148534" }
+  },
+  {
+    "id": "google/gemma-7b-it",
+    "name": "Google: Gemma 7B",
+    "description": "Gemma by Google is an advanced, open-source language model family, leveraging the latest in decoder-only, text-to-text technology. It offers English language capabilities across text generation tasks like question answering, summarization, and reasoning. The Gemma 7B variant is comparable in performance to leading open source models.\n\nUsage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).",
+    "pricing": { "prompt": "0.00000013", "completion": "0.00000013", "image": "0", "request": "0" },
+    "context_length": 8192,
+    "architecture": { "modality": "text", "tokenizer": "Llama2", "instruct_type": "gemma" },
+    "top_provider": { "max_completion_tokens": null, "is_moderated": false },
+    "per_request_limits": { "prompt_tokens": "34277093", "completion_tokens": "34277093" }
+  },
+  {
+    "id": "gryphe/mythomax-l2-13b:nitro",
+    "name": "MythoMax 13B (nitro)",
+    "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge\n\nNote: this is a higher-throughput version of [this model](/models/gryphe/mythomax-l2-13b), and may have higher prices and slightly different outputs.",
+    "pricing": { "prompt": "0.0000002", "completion": "0.0000002", "image": "0", "request": "0" },
+    "context_length": 4096,
+    "architecture": { "modality": "text", "tokenizer": "Llama2", "instruct_type": "alpaca" },
+    "top_provider": { "max_completion_tokens": null, "is_moderated": false },
+    "per_request_limits": { "prompt_tokens": "22280110", "completion_tokens": "22280110" }
+  }
+]
diff --git a/src/libs/agent-runtime/openrouter/index.test.ts b/src/libs/agent-runtime/openrouter/index.test.ts
index b4cc9bd718e8..306c9126af4c 100644
--- a/src/libs/agent-runtime/openrouter/index.test.ts
+++ b/src/libs/agent-runtime/openrouter/index.test.ts
@@ -5,6 +5,7 @@ import { Mock, afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { ChatStreamCallbacks, LobeOpenAICompatibleRuntime } from '@/libs/agent-runtime';
 
 import * as debugStreamModule from '../utils/debugStream';
+import models from './fixtures/models.json';
 import { LobeOpenRouterAI } from './index';
 
 const provider = 'openrouter';
@@ -24,6 +25,7 @@ beforeEach(() => {
   vi.spyOn(instance['client'].chat.completions, 'create').mockResolvedValue(
     new ReadableStream() as any,
   );
+  vi.spyOn(instance['client'].models, 'list').mockResolvedValue({ data: [] } as any);
 });
 
 afterEach(() => {
@@ -347,4 +349,15 @@ describe('LobeOpenRouterAI', () => {
       });
     });
   });
+
+  describe('models', () => {
+    it('should get models', async () => {
+      // mock the models.list method
+      (instance['client'].models.list as Mock).mockResolvedValue({ data: models });
+
+      const list = await instance.models();
+
+      expect(list).toMatchSnapshot();
+    });
+  });
 });
diff --git a/src/libs/agent-runtime/openrouter/index.ts b/src/libs/agent-runtime/openrouter/index.ts
index 5f268979587f..bc5b29ca1e73 100644
--- a/src/libs/agent-runtime/openrouter/index.ts
+++ b/src/libs/agent-runtime/openrouter/index.ts
@@ -36,7 +36,7 @@ export const LobeOpenRouterAI = LobeOpenAICompatibleFactory({
             ? model.top_provider.max_completion_tokens
             : undefined,
         tokens: model.context_length,
-        vision: model.description.includes('vision'),
+        vision: model.description.includes('vision') || model.id.includes('vision'),
       };
     },
   },