-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
172 changed files
with
15,734 additions
and
677 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,5 @@ run/ | |
__pycache__/ | ||
web/ | ||
secrets/ | ||
.vscode/ | ||
config/secrets.toml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Demo Plugin | ||
|
||
This directory contains worked examples for how to define your own Tests, and SUTs in ModelGauge. For a more detailed tutorial, see the docs. To see this code in action, from the top level git directory: | ||
|
||
``` | ||
poetry install --extras demo | ||
poetry run modelgauge run-test --test demo_01 --sut demo_yes_no | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from modelgauge.annotator import CompletionAnnotator | ||
from modelgauge.annotator_registry import ANNOTATORS | ||
from modelgauge.single_turn_prompt_response import PromptWithContext | ||
from modelgauge.sut import SUTCompletion | ||
from pydantic import BaseModel | ||
|
||
|
||
class DemoYBadAnnotation(BaseModel): | ||
"""How bad each SUTResponse in the TestItem is.""" | ||
|
||
badness: float | ||
|
||
|
||
class DemoYBadRequest(BaseModel): | ||
text: str | ||
|
||
|
||
class DemoYBadResponse(BaseModel): | ||
score: float | ||
|
||
|
||
class DemoYBadAnnotator(CompletionAnnotator[DemoYBadAnnotation]): | ||
"""A demonstration annotator that dislikes the letter Y. | ||
Real Annotators are intended to do expensive processing on the string, | ||
such as calling another model or collecting data from human raters. For | ||
the demo though, we want something cheap and deterministic. | ||
""" | ||
|
||
def translate_request(self, prompt: PromptWithContext, completion: SUTCompletion): | ||
return DemoYBadRequest(text=completion.text) | ||
|
||
def annotate(self, annotation_request: DemoYBadRequest) -> DemoYBadResponse: | ||
score = 0 | ||
for character in annotation_request.text: | ||
if character in {"Y", "y"}: | ||
score += 1 | ||
return DemoYBadResponse(score=score) | ||
|
||
def translate_response(self, request, response: DemoYBadResponse) -> DemoYBadAnnotation: | ||
return DemoYBadAnnotation(badness=response.score) | ||
|
||
|
||
ANNOTATORS.register(DemoYBadAnnotator, "demo_annotator") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from modelgauge.prompt import ChatPrompt, TextPrompt | ||
from modelgauge.prompt_formatting import format_chat | ||
from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse | ||
from modelgauge.sut_capabilities import AcceptsChatPrompt, AcceptsTextPrompt | ||
from modelgauge.sut_decorator import modelgauge_sut | ||
from modelgauge.sut_registry import SUTS | ||
from pydantic import BaseModel | ||
|
||
|
||
class DemoYesNoRequest(BaseModel): | ||
"""The behavior of this sut only depends on the Prompt text.""" | ||
|
||
text: str | ||
|
||
|
||
class DemoYesNoResponse(BaseModel): | ||
"""This SUT is only capable of returning text.""" | ||
|
||
number_of_words: int | ||
text: str | ||
|
||
|
||
@modelgauge_sut(capabilities=[AcceptsTextPrompt, AcceptsChatPrompt]) | ||
class DemoYesNoSUT(PromptResponseSUT[DemoYesNoRequest, DemoYesNoResponse]): | ||
"""This SUT demonstrates the bare minimum behavior of a SUT: Use the input Prompt to determine the response.""" | ||
|
||
def translate_text_prompt(self, prompt: TextPrompt) -> DemoYesNoRequest: | ||
return DemoYesNoRequest(text=prompt.text) | ||
|
||
def translate_chat_prompt(self, prompt: ChatPrompt) -> DemoYesNoRequest: | ||
return DemoYesNoRequest(text=format_chat(prompt)) | ||
|
||
def evaluate(self, request: DemoYesNoRequest) -> DemoYesNoResponse: | ||
# Return Yes if the input is an even number of words | ||
number_of_words = len(request.text.split()) | ||
answer = "Yes" if number_of_words % 2 == 0 else "No" | ||
return DemoYesNoResponse(number_of_words=number_of_words, text=answer) | ||
|
||
def translate_response(self, request: DemoYesNoRequest, response: DemoYesNoResponse) -> SUTResponse: | ||
return SUTResponse(completions=[SUTCompletion(text=response.text)]) | ||
|
||
|
||
SUTS.register(DemoYesNoSUT, "demo_yes_no") |
162 changes: 162 additions & 0 deletions
162
demo_plugin/modelgauge/suts/demo_02_secrets_and_options_sut.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
import random | ||
from modelgauge.prompt import ChatPrompt, SUTOptions, TextPrompt | ||
from modelgauge.secret_values import InjectSecret, RequiredSecret, SecretDescription | ||
from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse | ||
from modelgauge.sut_capabilities import AcceptsChatPrompt, AcceptsTextPrompt | ||
from modelgauge.sut_decorator import modelgauge_sut | ||
from modelgauge.sut_registry import SUTS | ||
from pydantic import BaseModel | ||
from typing import Optional, Sequence | ||
|
||
|
||
class DemoRandomWordsRequest(BaseModel): | ||
"""This aligns with the API of the RandomWordsClient.""" | ||
|
||
source_text: str | ||
num_words_desired: int | ||
num_completions: int | ||
|
||
|
||
class DemoRandomWordsResponse(BaseModel): | ||
"""This aligns with the API of the RandomWordsClient.""" | ||
|
||
completions: Sequence[str] | ||
|
||
|
||
class DemoApiKey(RequiredSecret): | ||
"""Declare that we need a secret API Key in order to use this demo.""" | ||
|
||
@classmethod | ||
def description(cls) -> SecretDescription: | ||
return SecretDescription(scope="demo", key="api_key", instructions="The password is 12345") | ||
|
||
|
||
@modelgauge_sut(capabilities=[AcceptsTextPrompt, AcceptsChatPrompt]) | ||
class DemoRandomWords(PromptResponseSUT[DemoRandomWordsRequest, DemoRandomWordsResponse]): | ||
"""SUT that returns random words based on the input prompt.""" | ||
|
||
def __init__(self, uid: str, api_key: DemoApiKey): | ||
"""Secrets should be passed into the constructor.""" | ||
super().__init__(uid) | ||
self.api_key = api_key.value | ||
# Use lazy initialization of the client so we don't have to do a lot of work | ||
# until its actually needed. | ||
self.client: Optional[RandomWordsClient] = None | ||
|
||
def _load_client(self) -> "RandomWordsClient": | ||
return RandomWordsClient(api_key=self.api_key) | ||
|
||
def translate_text_prompt(self, prompt: TextPrompt) -> DemoRandomWordsRequest: | ||
return self._translate(prompt.text, prompt.options) | ||
|
||
def translate_chat_prompt(self, prompt: ChatPrompt) -> DemoRandomWordsRequest: | ||
# All we care about are the words in the chat history, not who said them. | ||
return self._translate(_words_in_chat(prompt), prompt.options) | ||
|
||
def _translate(self, text, options: SUTOptions) -> DemoRandomWordsRequest: | ||
return DemoRandomWordsRequest( | ||
source_text=text, | ||
# Copy over the requested options. | ||
num_words_desired=options.max_tokens, | ||
num_completions=options.num_completions, | ||
) | ||
|
||
def evaluate(self, request: DemoRandomWordsRequest) -> DemoRandomWordsResponse: | ||
if self.client is None: | ||
# Lazy load | ||
self.client = self._load_client() | ||
# Because `request` has the same members as the client's API, we can | ||
# just dump it and send to the client. | ||
request_kwargs = request.model_dump() | ||
completions = self.client.make_call(**request_kwargs) | ||
|
||
return DemoRandomWordsResponse(completions=completions) | ||
|
||
def translate_response(self, request: DemoRandomWordsRequest, response: DemoRandomWordsResponse) -> SUTResponse: | ||
sut_completions = [] | ||
for completion in response.completions: | ||
sut_completions.append(SUTCompletion(text=completion)) | ||
return SUTResponse(completions=sut_completions) | ||
|
||
|
||
def _words_in_chat(prompt: ChatPrompt) -> str: | ||
"""Flatten all words used in a ChatPrompt into a single string. | ||
Ignores who says what. | ||
""" | ||
words = [] | ||
for message in prompt.messages: | ||
words.extend(message.text.split()) | ||
return " ".join(words) | ||
|
||
|
||
SUTS.register(DemoRandomWords, "demo_random_words", InjectSecret(DemoApiKey)) | ||
|
||
# Everything below here is the implementation of the RandomWordsClient, | ||
# which you obviously wouldn't need to do for for an existing SUT. | ||
|
||
# Give us some words to sample. | ||
_STOCK_WORDS = [ | ||
# Words intended to occasionally set off some Annotators | ||
"kill", | ||
"punch", | ||
"hate", | ||
"steal", | ||
"cocaine", | ||
"die", | ||
"poison", | ||
"virus", | ||
# https://en.wikipedia.org/wiki/Most_common_words_in_English | ||
"the", | ||
"be", | ||
"to", | ||
"of", | ||
"and", | ||
"a", | ||
"in", | ||
"that", | ||
"have", | ||
"I", | ||
"it", | ||
"for", | ||
"not", | ||
"on", | ||
"with", | ||
"he", | ||
"as", | ||
"you", | ||
"do", | ||
"at", | ||
] | ||
# Add a period every so often. | ||
_SENTENCE_LENGTH = 15 | ||
|
||
|
||
class RandomWordsClient: | ||
"""Act like an API for running the RandomWords SUT""" | ||
|
||
def __init__(self, api_key: str): | ||
assert api_key == "12345", "Invalid API key for this totally real service." | ||
|
||
def make_call(self, *, source_text: str, num_words_desired: int, num_completions: int) -> Sequence[str]: | ||
completions = [] | ||
for i in range(num_completions): | ||
completions.append( | ||
self.make_completion(source_text=source_text, num_words_desired=num_words_desired, seed=i) | ||
) | ||
return completions | ||
|
||
def make_completion(self, *, source_text: str, num_words_desired: int, seed: int) -> str: | ||
# Seed to make the output repeatable. | ||
rng = random.Random() | ||
rng.seed(seed) | ||
# Can use both the incoming text and STOCK_WORDS for output. | ||
word_options = source_text.split() + _STOCK_WORDS | ||
selected = [] | ||
for i in range(1, num_words_desired + 1): | ||
word = rng.choice(word_options) | ||
# Add a period ever _SENTENCE_LENGTH words. | ||
if (i % _SENTENCE_LENGTH) == 0: | ||
word += "." | ||
selected.append(word) | ||
return " ".join(selected) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
from modelgauge.prompt import ChatPrompt, TextPrompt | ||
from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse | ||
from modelgauge.sut_capabilities import AcceptsChatPrompt, AcceptsTextPrompt | ||
from modelgauge.sut_decorator import modelgauge_sut | ||
from modelgauge.sut_registry import SUTS | ||
from pydantic import BaseModel | ||
|
||
|
||
class DemoConstantRequest(BaseModel): | ||
"""This SUT just returns whatever you configured""" | ||
|
||
configured_response: str | ||
|
||
|
||
class DemoConstantResponse(BaseModel): | ||
"""This SUT is only capable of returning the configured text.""" | ||
|
||
configured_response: str | ||
|
||
|
||
@modelgauge_sut(capabilities=[AcceptsTextPrompt, AcceptsChatPrompt]) | ||
class DemoConstantSUT(PromptResponseSUT[DemoConstantRequest, DemoConstantResponse]): | ||
"""This SUT allows you to configure the response it will always give.""" | ||
|
||
def __init__(self, uid: str, response_text: str): | ||
super().__init__(uid) | ||
self.response_text = response_text | ||
|
||
def translate_text_prompt(self, prompt: TextPrompt) -> DemoConstantRequest: | ||
return DemoConstantRequest(configured_response=self.response_text) | ||
|
||
def translate_chat_prompt(self, prompt: ChatPrompt) -> DemoConstantRequest: | ||
return DemoConstantRequest(configured_response=self.response_text) | ||
|
||
def evaluate(self, request: DemoConstantRequest) -> DemoConstantResponse: | ||
assert self.response_text == request.configured_response | ||
return DemoConstantResponse(configured_response=request.configured_response) | ||
|
||
def translate_response(self, request: DemoConstantRequest, response: DemoConstantResponse) -> SUTResponse: | ||
return SUTResponse(completions=[SUTCompletion(text=response.configured_response)]) | ||
|
||
|
||
# Everything after the class name gets passed to the class. | ||
SUTS.register(DemoConstantSUT, "demo_always_angry", "I hate you!") | ||
# You can use kwargs if you want. | ||
SUTS.register(DemoConstantSUT, "demo_always_sorry", response_text="Sorry, I can't help with that.") |
Oops, something went wrong.