Skip to content

Commit

Permalink
add nvidia-nim-api plugin to plugins/
Browse files Browse the repository at this point in the history
  • Loading branch information
zijiachen95 committed Nov 13, 2024
1 parent ce0f033 commit edba0ed
Show file tree
Hide file tree
Showing 4 changed files with 257 additions and 0 deletions.
1 change: 1 addition & 0 deletions plugins/nvidia/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Plugin for interacting with the NVIDIA NIM API.
173 changes: 173 additions & 0 deletions plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
from modelgauge.prompt import ChatPrompt, ChatRole, SUTOptions, TextPrompt
from modelgauge.secret_values import (
InjectSecret,
OptionalSecret,
RequiredSecret,
SecretDescription,
)
from modelgauge.sut import (
PromptResponseSUT,
SUTCompletion,
SUTResponse,
TopTokens,
)
from modelgauge.sut_capabilities import (
AcceptsChatPrompt,
AcceptsTextPrompt,
)
from modelgauge.sut_decorator import modelgauge_sut
from modelgauge.sut_registry import SUTS
from openai import OpenAI
from openai.types.chat import ChatCompletion
from pydantic import BaseModel
from typing import Any, Dict, List, Optional, Union


_SYSTEM_ROLE = "system"
_USER_ROLE = "user"
_ASSISTANT_ROLE = "assistant"
_TOOL_ROLE = "tool_call_id"

_ROLE_MAP = {
ChatRole.user: _USER_ROLE,
ChatRole.sut: _ASSISTANT_ROLE,
ChatRole.system: _SYSTEM_ROLE,
}


class NvidiaNIMApiKey(RequiredSecret):
@classmethod
def description(cls) -> SecretDescription:
return SecretDescription(
scope="nvidia-nim-api",
key="api_key",
instructions="See https://build.nvidia.com/",
)


class OpenAIChatMessage(BaseModel):
content: str
role: str
name: Optional[str] = None
tool_calls: Optional[List[Dict]] = None
tool_call_id: Optional[str] = None


class OpenAIChatRequest(BaseModel):
messages: List[OpenAIChatMessage]
model: str
frequency_penalty: Optional[float] = None
logit_bias: Optional[bool] = None
max_tokens: Optional[int] = 256
n: Optional[int] = 1
presence_penalty: Optional[float] = None
response_format: Optional[Dict] = None
seed: Optional[int] = None
stop: Optional[Union[str, List[str]]] = None
stream: Optional[bool] = None
temperature: Optional[float] = 1.0
top_p: Optional[float] = None
tools: Optional[List] = None
tool_choice: Optional[Union[str, Dict]] = None
user: Optional[str] = None


@modelgauge_sut(
capabilities=[
AcceptsTextPrompt,
AcceptsChatPrompt,
]
)
class NvidiaNIMApiClient(PromptResponseSUT[OpenAIChatRequest, ChatCompletion]):
"""
Documented at https://https://docs.api.nvidia.com/
"""

def __init__(
self, uid: str, model: str, api_key: NvidiaNIMApiKey):
super().__init__(uid)
self.model = model
self.client: Optional[OpenAI] = None
self.api_key = api_key.value

def _load_client(self) -> OpenAI:
return OpenAI(
api_key=self.api_key,
base_url="https://integrate.api.nvidia.com/v1"
)

def translate_text_prompt(self, prompt: TextPrompt) -> OpenAIChatRequest:
messages = [OpenAIChatMessage(content=prompt.text, role=_USER_ROLE)]
return self._translate_request(messages, prompt.options)

def translate_chat_prompt(self, prompt: ChatPrompt) -> OpenAIChatRequest:
messages = []
for message in prompt.messages:
messages.append(
OpenAIChatMessage(content=message.text, role=_ROLE_MAP[message.role])
)
return self._translate_request(messages, prompt.options)

def _translate_request(
self, messages: List[OpenAIChatMessage], options: SUTOptions
):
optional_kwargs: Dict[str, Any] = {}
return OpenAIChatRequest(
messages=messages,
model=self.model,
frequency_penalty=options.frequency_penalty,
max_tokens=options.max_tokens,
n=options.num_completions,
presence_penalty=options.presence_penalty,
stop=options.stop_sequences,
top_p=options.top_p,
**optional_kwargs,
)

def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion:
if self.client is None:
# Handle lazy init.
self.client = self._load_client()
request_dict = request.model_dump(exclude_none=True)
return self.client.chat.completions.create(**request_dict)

def translate_response(
self, request: OpenAIChatRequest, response: ChatCompletion
) -> SUTResponse:
completions = []
for choice in response.choices:
text = choice.message.content
if text is None:
text = ""
completions.append(SUTCompletion(text=text))
return SUTResponse(completions=completions)


SUTS.register(
NvidiaNIMApiClient,
"nvidia/llama-3.1-nemotron-70b-instruct",
"nvidia/llama-3.1-nemotron-70b-instruct",
InjectSecret(NvidiaNIMApiKey),
)


SUTS.register(
NvidiaNIMApiClient,
"nvidia/nemotron-4-340b-instruct",
"nvidia/nemotron-4-340b-instruct",
InjectSecret(NvidiaNIMApiKey),
)

SUTS.register(
NvidiaNIMApiClient,
"nvidia/mistral-nemo-minitron-8b-8k-instruct",
"nvidia/mistral-nemo-minitron-8b-8k-instruct",
InjectSecret(NvidiaNIMApiKey),
)

SUTS.register(
NvidiaNIMApiClient,
"nvidia/nemotron-mini-4b-instruct",
"nvidia/nemotron-mini-4b-instruct",
InjectSecret(NvidiaNIMApiKey),
)
16 changes: 16 additions & 0 deletions plugins/nvidia/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[tool.poetry]
name = "modelgauge-nvidia"
version = "0.6.3"
description = ""
authors = ["MLCommons AI Safety <[email protected]>"]
readme = "README.md"
packages = [{include = "modelgauge"}]

[tool.poetry.dependencies]
python = "^3.10"
openai = "^1.8.0"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
67 changes: 67 additions & 0 deletions plugins/nvidia/tests/test_nvidia_nim_api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from modelgauge.prompt import SUTOptions, TextPrompt
from modelgauge.sut import SUTCompletion, SUTResponse, TokenProbability, TopTokens
from modelgauge.suts.nvidia_nim_api_client import (
NvidiaNIMApiKey,
NvidiaNIMApiClient,
OpenAIChatMessage,
OpenAIChatRequest
)
from openai.types.chat import ChatCompletion


def _make_client():
return NvidiaNIMApiClient(
uid="test-model",
model="some-model",
api_key=NvidiaNIMApiKey("some-value")
)


def test_openai_chat_translate_request():
client = _make_client()
prompt = TextPrompt(text="some-text")
request = client.translate_text_prompt(prompt)
assert request == OpenAIChatRequest(
model="some-model",
messages=[OpenAIChatMessage(content="some-text", role="user")],
max_tokens=100,
n=1,
)

def test_openai_chat_translate_response():
client = _make_client()
request = OpenAIChatRequest(
model="some-model",
messages=[],
)
# response is base on openai request: https://platform.openai.com/docs/api-reference/chat/create
response = ChatCompletion.model_validate_json(
"""\
{
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1677652288,
"model": "nvidia/nemotron-mini-4b-instruct",
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello there, how may I assist you today?"
},
"logprobs": null,
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": 9,
"completion_tokens": 12,
"total_tokens": 21
}
}
"""
)
result = client.translate_response(request, response)
assert result == SUTResponse(
completions=[SUTCompletion(text="Hello there, how may I assist you today?", top_logprobs=None)]
)

0 comments on commit edba0ed

Please sign in to comment.