Skip to content

Commit

Permalink
add support for NVIDIA llm (#1645)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

add support for NVIDIA llm
### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Zhedong Cen <[email protected]>
  • Loading branch information
hangters and aopstudio authored Jul 23, 2024
1 parent 95821f6 commit b4a281e
Show file tree
Hide file tree
Showing 8 changed files with 508 additions and 7 deletions.
284 changes: 284 additions & 0 deletions conf/llm_factories.json
Original file line number Diff line number Diff line change
Expand Up @@ -1918,6 +1918,290 @@
"model_type": "chat"
}
]
},
{
"name": "NVIDIA",
"logo": "",
"tags": "LLM,TEXT EMBEDDING, TEXT RE-RANK",
"status": "1",
"llm": [
{
"llm_name": "nvidia/nemotron-4-340b-reward",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "aisingapore/sea-lion-7b-instruct",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "databricks/dbrx-instruct",
"tags": "LLM,CHAT,16K",
"max_tokens": 16384,
"model_type": "chat"
},
{
"llm_name": "google/gemma-7b",
"tags": "LLM,CHAT,32K",
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "google/gemma-2b",
"tags": "LLM,CHAT,16K",
"max_tokens": 16384,
"model_type": "chat"
},
{
"llm_name": "google/gemma-2-9b-it",
"tags": "LLM,CHAT,8K",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "google/gemma-2-27b-it",
"tags": "LLM,CHAT,8K",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "google/recurrentgemma-2b",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "mediatek/breeze-7b-instruct",
"tags": "LLM,CHAT,8K",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "meta/llama2-70b",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "meta/llama3-8b",
"tags": "LLM,CHAT,8K",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "meta/llama3-70b",
"tags": "LLM,CHAT,8K",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "microsoft/phi-3-medium-128k-instruct",
"tags": "LLM,CHAT,128K",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "microsoft/phi-3-medium-4k-instruct",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "microsoftphi-3-mini-128k-instruct",
"tags": "LLM,CHAT,128K",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "microsoft/phi-3-mini-4k-instruct",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "microsoft/phi-3-small-128k-instruct",
"tags": "LLM,CHAT,128K",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "microsoft/phi-3-small-8k-instruct",
"tags": "LLM,CHAT,8K",
"max_tokens": 8192,
"model_type": "chat"
},
{
"llm_name": "mistralai/mistral-7b-instruct",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "mistralai/mistral-7b-instruct-v0.3",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "mistralai/mixtral-8x7b-instruct",
"tags": "LLM,CHAT,32K",
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "mistralai/mixtral-8x22b-instruct",
"tags": "LLM,CHAT,64K",
"max_tokens": 65536,
"model_type": "chat"
},
{
"llm_name": "mistralai/mistral-large",
"tags": "LLM,CHAT,32K",
"max_tokens": 32768,
"model_type": "chat"
},
{
"llm_name": "nv-mistralai/mistral-nemo-12b-instruct",
"tags": "LLM,CHAT,128K",
"max_tokens": 131072,
"model_type": "chat"
},
{
"llm_name": "nvidia/llama3-chatqa-1.5-70b",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "nvidia/llama3-chatqa-1.5-8b",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "nvidia/nemotron-4-340b-instruct",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "seallms/seallm-7b-v2.5",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "snowflake/arctic",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "upstage/solar-10.7b-instruct",
"tags": "LLM,CHAT,4K",
"max_tokens": 4096,
"model_type": "chat"
},
{
"llm_name": "baai/bge-m3",
"tags": "TEXT EMBEDDING,8K",
"max_tokens": 8192,
"model_type": "embedding"
},
{
"llm_name": "nvidia/embed-qa-4",
"tags": "TEXT EMBEDDING,512",
"max_tokens": 512,
"model_type": "embedding"
},
{
"llm_name": "nvidia/nv-embed-v1",
"tags": "TEXT EMBEDDING,32K",
"max_tokens": 32768,
"model_type": "embedding"
},
{
"llm_name": "nvidia/nv-embedqa-e5-v5",
"tags": "TEXT EMBEDDING,512",
"max_tokens": 512,
"model_type": "embedding"
},
{
"llm_name": "nvidia/nv-embedqa-mistral-7b-v2",
"tags": "TEXT EMBEDDING,512",
"max_tokens": 512,
"model_type": "embedding"
},
{
"llm_name": "nvidia/nv-rerankqa-mistral-4b-v3",
"tags": "RE-RANK,512",
"max_tokens": 512,
"model_type": "rerank"
},
{
"llm_name": "nvidia/rerank-qa-mistral-4b",
"tags": "RE-RANK,512",
"max_tokens": 512,
"model_type": "rerank"
},
{
"llm_name": "snowflake/arctic-embed-l",
"tags": "TEXT EMBEDDING,512",
"max_tokens": 512,
"model_type": "embedding"
},
{
"llm_name": "adept/fuyu-8b",
"tags": "LLM,IMAGE2TEXT,4K",
"max_tokens": 4096,
"model_type": "image2text"
},
{
"llm_name": "google/deplot",
"tags": "LLM,IMAGE2TEXT,4K",
"max_tokens": 4096,
"model_type": "image2text"
},
{
"llm_name": "google/paligemma",
"tags": "LLM,IMAGE2TEXT,4K",
"max_tokens": 4096,
"model_type": "image2text"
},
{
"llm_name": "Iiuhaotian/Ilava-v1.6-34b",
"tags": "LLM,IMAGE2TEXT,4K",
"max_tokens": 4096,
"model_type": "image2text"
},
{
"llm_name": "Iiuhaotian/Ilava-v1.6-mistral-7b",
"tags": "LLM,IMAGE2TEXT,4K",
"max_tokens": 4096,
"model_type": "image2text"
},
{
"llm_name": "microsoft/kosmos-2",
"tags": "LLM,IMAGE2TEXT,4K",
"max_tokens": 4096,
"model_type": "image2text"
},
{
"llm_name": "microsoft/phi-3-vision-128k-instruct",
"tags": "LLM,IMAGE2TEXT,128K",
"max_tokens": 131072,
"model_type": "image2text"
},
{
"llm_name": "nvidia/neva-22b",
"tags": "LLM,IMAGE2TEXT,4K",
"max_tokens": 4096,
"model_type": "image2text"
}
]
}
]
}
12 changes: 8 additions & 4 deletions rag/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
"BAAI": DefaultEmbedding,
"Mistral": MistralEmbed,
"Bedrock": BedrockEmbed,
"Gemini":GeminiEmbed
"Gemini":GeminiEmbed,
"NVIDIA":NvidiaEmbed
}


Expand All @@ -48,7 +49,8 @@
"Moonshot": LocalCV,
'Gemini':GeminiCV,
'OpenRouter':OpenRouterCV,
"LocalAI":LocalAICV
"LocalAI":LocalAICV,
"NVIDIA":NvidiaCV
}


Expand All @@ -71,15 +73,17 @@
"Bedrock": BedrockChat,
"Groq": GroqChat,
'OpenRouter':OpenRouterChat,
"StepFun":StepFunChat
"StepFun":StepFunChat,
"NVIDIA":NvidiaChat
}


RerankModel = {
"BAAI": DefaultRerank,
"Jina": JinaRerank,
"Youdao": YoudaoRerank,
"Xinference": XInferenceRerank
"Xinference": XInferenceRerank,
"NVIDIA":NvidiaRerank
}


Expand Down
Loading

0 comments on commit b4a281e

Please sign in to comment.