Skip to content

Commit

Permalink
feat: Expanded benchmarking to 100 samples and added further analysis…
Browse files Browse the repository at this point in the history
… of the resulting metrics.
  • Loading branch information
kurisu committed Oct 24, 2024
1 parent af5828d commit b561eb0
Show file tree
Hide file tree
Showing 7 changed files with 639 additions and 543 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ TBD

TBD

## Future Work
## Related Research

* [Retro: A Generalist Agent for Science](https://arxiv.org/abs/2112.04426)
* [RETRO-pytorch](https://github.com/lucidrains/RETRO-pytorch)
* [Why isn't Retro mainstream? State-of-the-art within reach](https://www.reddit.com/r/MachineLearning/comments/1cffgkt/d_why_isnt_retro_mainstream_stateoftheart_within/)

TBD

Expand Down
28 changes: 18 additions & 10 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
)
from tools.text_to_image import TextToImageTool
from transformers import load_tool
from prompts import DEFAULT_SQUAD_REACT_CODE_SYSTEM_PROMPT, FOCUSED_SQUAD_REACT_CODE_SYSTEM_PROMPT
from prompts import (
DEFAULT_SQUAD_REACT_CODE_SYSTEM_PROMPT,
FOCUSED_SQUAD_REACT_CODE_SYSTEM_PROMPT,
)
from pygments.formatters import HtmlFormatter


Expand Down Expand Up @@ -65,7 +68,7 @@
model_name=model_name,
toolbox=TASK_SOLVING_TOOLBOX,
system_prompt=system_prompt,
use_openai=True, # Use OpenAI instead of a local or HF model as the base LLM engine
use_openai=True, # Use OpenAI instead of a local or HF model as the base LLM engine
)

app = None
Expand Down Expand Up @@ -130,24 +133,28 @@ def update_session(value, request: Request):

return component


from gradio.components import (
Component as GradioComponent,
)
from gradio.components.chatbot import Chatbot, FileDataDict, FileData, ComponentMessage, FileMessage
from gradio.components.chatbot import (
Chatbot,
FileDataDict,
FileData,
ComponentMessage,
FileMessage,
)


class CleanChatBot(Chatbot):
def __init__(self, **kwargs):
super().__init__(**kwargs)

def _postprocess_content(
self,
chat_message: str
| tuple
| list
| FileDataDict
| FileData
| GradioComponent
| None,
chat_message: (
str | tuple | list | FileDataDict | FileData | GradioComponent | None
),
) -> str | FileMessage | ComponentMessage | None:
response = super()._postprocess_content(chat_message)
print(f"Post processing content: {response}")
Expand All @@ -156,6 +163,7 @@ def _postprocess_content(
response.props["open"] = False
return response


with gr.Blocks(
fill_height=True,
css=".gradio-container .message .content {text-align: left;}"
Expand Down
1,148 changes: 616 additions & 532 deletions benchmarking.ipynb

Large diffs are not rendered by default.

Binary file modified benchmarks/baseline.pkl
Binary file not shown.
Binary file modified benchmarks/focused.pkl
Binary file not shown.
Binary file modified benchmarks/succinct.pkl
Binary file not shown.
Binary file modified samples/samples.pkl
Binary file not shown.

0 comments on commit b561eb0

Please sign in to comment.