Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add nvidia-nim-api plugin to plugins/ #683

Merged
merged 6 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions plugins/nvidia/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Plugin for interacting with the NVIDIA NIM API.
162 changes: 162 additions & 0 deletions plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from typing import Any, Dict, List, Optional, Union

from openai import OpenAI
from openai.types.chat import ChatCompletion
from pydantic import BaseModel

from modelgauge.prompt import ChatPrompt, ChatRole, SUTOptions, TextPrompt
from modelgauge.secret_values import (
InjectSecret,
RequiredSecret,
SecretDescription,
)
from modelgauge.sut import (
PromptResponseSUT,
SUTCompletion,
SUTResponse,
)
from modelgauge.sut_capabilities import (
AcceptsChatPrompt,
AcceptsTextPrompt,
)
from modelgauge.sut_decorator import modelgauge_sut
from modelgauge.sut_registry import SUTS

_SYSTEM_ROLE = "system"
_USER_ROLE = "user"
_ASSISTANT_ROLE = "assistant"
_TOOL_ROLE = "tool_call_id"

_ROLE_MAP = {
ChatRole.user: _USER_ROLE,
ChatRole.sut: _ASSISTANT_ROLE,
ChatRole.system: _SYSTEM_ROLE,
}


class NvidiaNIMApiKey(RequiredSecret):
@classmethod
def description(cls) -> SecretDescription:
return SecretDescription(
scope="nvidia-nim-api",
key="api_key",
instructions="See https://build.nvidia.com/",
)


class OpenAIChatMessage(BaseModel):
content: str
role: str
name: Optional[str] = None
tool_calls: Optional[List[Dict]] = None
tool_call_id: Optional[str] = None


class OpenAIChatRequest(BaseModel):
messages: List[OpenAIChatMessage]
model: str
frequency_penalty: Optional[float] = None
logit_bias: Optional[bool] = None
max_tokens: Optional[int] = 256
n: Optional[int] = 1
presence_penalty: Optional[float] = None
response_format: Optional[Dict] = None
seed: Optional[int] = None
stop: Optional[Union[str, List[str]]] = None
stream: Optional[bool] = None
temperature: Optional[float] = 1.0
top_p: Optional[float] = None
tools: Optional[List] = None
tool_choice: Optional[Union[str, Dict]] = None
user: Optional[str] = None


@modelgauge_sut(
capabilities=[
AcceptsTextPrompt,
AcceptsChatPrompt,
]
)
class NvidiaNIMApiClient(PromptResponseSUT[OpenAIChatRequest, ChatCompletion]):
"""
Documented at https://https://docs.api.nvidia.com/
"""

def __init__(self, uid: str, model: str, api_key: NvidiaNIMApiKey):
super().__init__(uid)
self.model = model
self.client: Optional[OpenAI] = None
self.api_key = api_key.value

def _load_client(self) -> OpenAI:
return OpenAI(api_key=self.api_key, base_url="https://integrate.api.nvidia.com/v1")

def translate_text_prompt(self, prompt: TextPrompt) -> OpenAIChatRequest:
messages = [OpenAIChatMessage(content=prompt.text, role=_USER_ROLE)]
return self._translate_request(messages, prompt.options)

def translate_chat_prompt(self, prompt: ChatPrompt) -> OpenAIChatRequest:
messages = []
for message in prompt.messages:
messages.append(OpenAIChatMessage(content=message.text, role=_ROLE_MAP[message.role]))
return self._translate_request(messages, prompt.options)

def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOptions):
optional_kwargs: Dict[str, Any] = {}
return OpenAIChatRequest(
messages=messages,
model=self.model,
frequency_penalty=options.frequency_penalty,
max_tokens=options.max_tokens,
n=options.num_completions,
presence_penalty=options.presence_penalty,
stop=options.stop_sequences,
top_p=options.top_p,
**optional_kwargs,
)

def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion:
if self.client is None:
# Handle lazy init.
self.client = self._load_client()
request_dict = request.model_dump(exclude_none=True)
return self.client.chat.completions.create(**request_dict)

def translate_response(self, request: OpenAIChatRequest, response: ChatCompletion) -> SUTResponse:
completions = []
for choice in response.choices:
text = choice.message.content
if text is None:
text = ""
completions.append(SUTCompletion(text=text))
return SUTResponse(completions=completions)


SUTS.register(
NvidiaNIMApiClient,
"nvidia-llama-3.1-nemotron-70b-instruct",
"nvidia/llama-3.1-nemotron-70b-instruct",
InjectSecret(NvidiaNIMApiKey),
)


SUTS.register(
NvidiaNIMApiClient,
"nvidia-nemotron-4-340b-instruct",
"nvidia/nemotron-4-340b-instruct",
InjectSecret(NvidiaNIMApiKey),
)

SUTS.register(
NvidiaNIMApiClient,
"nvidia-mistral-nemo-minitron-8b-8k-instruct",
"nvidia/mistral-nemo-minitron-8b-8k-instruct",
InjectSecret(NvidiaNIMApiKey),
)

SUTS.register(
NvidiaNIMApiClient,
"nvidia-nemotron-mini-4b-instruct",
"nvidia/nemotron-mini-4b-instruct",
InjectSecret(NvidiaNIMApiKey),
)
16 changes: 16 additions & 0 deletions plugins/nvidia/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tool.poetry]
name = "modelgauge-nvidia"
version = "0.6.3"
description = ""
authors = ["MLCommons AI Safety <[email protected]>"]
readme = "README.md"
packages = [{include = "modelgauge"}]

[tool.poetry.dependencies]
python = "^3.10"
openai = "^1.8.0"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
64 changes: 64 additions & 0 deletions plugins/nvidia/tests/test_nvidia_nim_api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from modelgauge.suts.nvidia_nim_api_client import (
NvidiaNIMApiKey,
NvidiaNIMApiClient,
OpenAIChatMessage,
OpenAIChatRequest,
)
from openai.types.chat import ChatCompletion

from modelgauge.prompt import TextPrompt
from modelgauge.sut import SUTCompletion, SUTResponse


def _make_client():
return NvidiaNIMApiClient(uid="test-model", model="some-model", api_key=NvidiaNIMApiKey("some-value"))


def test_openai_chat_translate_request():
client = _make_client()
prompt = TextPrompt(text="some-text")
request = client.translate_text_prompt(prompt)
assert request == OpenAIChatRequest(
model="some-model",
messages=[OpenAIChatMessage(content="some-text", role="user")],
max_tokens=100,
n=1,
)


def test_openai_chat_translate_response():
client = _make_client()
request = OpenAIChatRequest(
model="some-model",
messages=[],
)
# response is base on openai request: https://platform.openai.com/docs/api-reference/chat/create
response = ChatCompletion.model_validate_json(
"""\
{
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1677652288,
"model": "nvidia/nemotron-mini-4b-instruct",
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello there, how may I assist you today?"
},
"logprobs": null,
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": 9,
"completion_tokens": 12,
"total_tokens": 21
}
}
"""
)
result = client.translate_response(request, response)
assert result == SUTResponse(
completions=[SUTCompletion(text="Hello there, how may I assist you today?", top_logprobs=None)]
)
21 changes: 19 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ together = "^1.2.3"
modelgauge_anthropic = {version = "*", optional = true}
modelgauge_azure = {version = "*", optional = true}
modelgauge_demo_plugin = {version = "*", optional = false}
modelgauge_nvidia = {version = "*", optional = true}
modelgauge_standard_tests = {version = "*", optional = true}
modelgauge_openai = {version = "*", optional = true}
modelgauge_huggingface = {version = "*", optional = true}
Expand All @@ -89,6 +90,7 @@ modelgauge_mistral = {version = "*", optional = true}
modelgauge_anthropic = {path = "plugins/anthropic", develop = true, optional=true}
modelgauge_azure = {path = "plugins/azure", develop = true, optional=true}
modelgauge_demo_plugin = {path = "demo_plugin", develop = true, optional=false}
modelgauge_nvidia = {path = "plugins/nvidia", develop = true, optional=true}
modelgauge_standard_tests = {path = "plugins/standard_tests", develop = true, optional=true}
modelgauge_openai = {path = "plugins/openai", develop = true, optional=true}
modelgauge_huggingface = {path = "plugins/huggingface", develop = true, optional=true}
Expand All @@ -112,14 +114,15 @@ httpx = "^0.27.2"
anthropic = ["modelgauge_anthropic"]
azure = ["modelgauge_azure"]
demo = ["modelgauge_demo_plugin"]
nvidia = ["modelgauge_nvidia"]
standard_tests = ["modelgauge_standard_tests"]
openai = ["modelgauge_openai"]
huggingface = ["modelgauge_huggingface"]
perspective_api = ["modelgauge_perspective_api"]
google = ["modelgauge_google"]
vertexai = ["modelgauge_vertexai"]
mistral = ["modelgauge_mistral"]
all_plugins = ["modelgauge_anthropic", "modelgauge_azure", "modelgauge_demo_plugin", "modelgauge_openai", "modelgauge_standard_tests", "modelgauge_perspective_api", "modelgauge_huggingface", "modelgauge_google", "modelgauge_vertexai", "modelgauge_mistral"]
all_plugins = ["modelgauge_anthropic", "modelgauge_azure", "modelgauge_demo_plugin", "modelgauge_nvidia", "modelgauge_openai", "modelgauge_standard_tests", "modelgauge_perspective_api", "modelgauge_huggingface", "modelgauge_google", "modelgauge_vertexai", "modelgauge_mistral"]

[tool.poetry.scripts]
modelbench = "modelbench.run:cli"
Expand Down