Skip to content

Commit

Permalink
Merge pull request #11 from simular-ai/dev
Browse files Browse the repository at this point in the history
Setup Instructions, Deprecated Code Cleanup
  • Loading branch information
eric-simu authored Oct 20, 2024
2 parents 4630fb8 + 62d0124 commit 00d8388
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 341 deletions.
Binary file removed .DS_Store
Binary file not shown.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
logs/
logs/
.DS_Store
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,26 @@ Install the agent_s package and dependencies
pip install -e .
```

Set your LLM API Keys and other environment variables. You can do this by adding the following lines to your .bashrc (Linux), or .zshrc (MacOS) file. We support OpenAI, Azure OpenAI, Anthropic, and vLLM models.

1. OpenAI
```
export OPENAI_API_KEY=<YOUR_API_KEY>
```
2. Anthropic
```
export ANTHROPIC_API_KEY=<YOUR_API_KEY>
```
3. OpenAI on Azure
```
export AZURE_OPENAI_API_BASE=<DEPLOYMENT_NAME>
export AZURE_OPENAI_API_KEY=<YOUR_API_KEY>
```
4. vLLM for Local Models
```
export vLLM_ENDPOINT_URL=<YOUR_DEPLOYMENT_URL>
```

### Setup Retrieval from Web using Perplexica

1. Ensure Docker is installed and running on your system.
Expand Down
164 changes: 11 additions & 153 deletions agent_s/MultimodalAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,12 @@
from agent_s.MultimodalEngine import (
LMMEngineOpenAI,
LMMEngineAzureOpenAI,
LMMEngineLlava,
LMMEngineCogVLM,
LMMEnginevLLM,
LMMEngineAnthropic,
LMMEngineQwen,
)
import base64
import re

# TODO: Import only if module exists, else ignore
# from llava.constants import (
# IMAGE_TOKEN_INDEX,
# DEFAULT_IMAGE_TOKEN,
# DEFAULT_IM_START_TOKEN,
# DEFAULT_IM_END_TOKEN,
# IMAGE_PLACEHOLDER,
# )

data_type_map = {
"openai": {"image_url": "image_url"},
"anthropic": {"image_url": "image"},
Expand All @@ -42,12 +30,6 @@ def __init__(self, engine_params=None, system_prompt=None, engine=None):
self.engine = LMMEngineAzureOpenAI(**engine_params)
elif engine_type == "vllm":
self.engine = LMMEnginevLLM(**engine_params)
elif engine_type == "qwen":
self.engine = LMMEngineQwen(**engine_params)
elif engine_type == "llava":
self.engine = LMMEngineLlava(**engine_params)
elif engine_type == "cogvlm":
self.engine = LMMEngineCogVLM(**engine_params)
else:
raise ValueError("engine_type must be either 'openai' or 'azure'")
else:
Expand All @@ -73,15 +55,13 @@ def encode_image(self, image_content):
def reset(
self,
):
if isinstance(self.engine, (LMMEngineCogVLM, LMMEngineLlava)):
self.messages = []
else:
self.messages = [
{
"role": "system",
"content": [{"type": "text", "text": self.system_prompt}],
}
]

self.messages = [
{
"role": "system",
"content": [{"type": "text", "text": self.system_prompt}],
}
]

def add_system_prompt(self, system_prompt):
self.system_prompt = system_prompt
Expand All @@ -98,12 +78,6 @@ def add_system_prompt(self, system_prompt):
}
)

# Don't add the system prompt if we are using llava or other hf models
if isinstance(self.engine, LMMEngineLlava) or isinstance(
self.engine, LMMEngineCogVLM
):
self.messages = []

def remove_message_at(self, index):
"""Remove a message at a given index"""
if index < len(self.messages):
Expand Down Expand Up @@ -135,80 +109,8 @@ def add_message(
):
"""Add a new message to the list of messages"""

# For inference from locally hosted llava based on https://github.com/haotian-liu/LLaVA/
if isinstance(self.engine, LMMEngineLlava):

# No system prompt so first message will be from user
if len(self.messages) == 0:
role = "user"
else:
# infer role from previous message
if self.messages[-1]["role"] == "user":
role = "assistant"
elif self.messages[-1]["role"] == "assistant":
role = "user"

image_token_se = (
DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
)

qs = text_content
if role == "user":
if len(self.messages) == 0:
# If this is the very first user message, add the system prompt to it to dictate behavior
qs = self.system_prompt + "\n" + qs
# TODO: Add comment explaining what this next part does
if IMAGE_PLACEHOLDER in qs:
if self.engine.model.config.mm_use_im_start_end:
qs = re.sub(IMAGE_PLACEHOLDER, image_token_se, qs)
else:
qs = re.sub(IMAGE_PLACEHOLDER, DEFAULT_IMAGE_TOKEN, qs)
else:
if self.engine.model.config.mm_use_im_start_end:
qs = image_token_se + "\n" + qs
else:
qs = DEFAULT_IMAGE_TOKEN + "\n" + qs

message = {"role": role, "content": qs}
else:
message = {"role": role, "content": text_content}

# Capable of handling only one image right now. TODO: make capable of handling more images
if image_content:
if self.engine.args.image_file == None:
self.engine.args.image_file = image_content

self.messages.append(message)

elif isinstance(self.engine, LMMEngineCogVLM):
# No system prompt so first message will be from user
if len(self.messages) == 0:
role = "user"
else:
# infer role from previous message
if self.messages[-1]["role"] == "user":
role = "assistant"
elif self.messages[-1]["role"] == "assistant":
role = "user"

# Add message content as a new message, if this is the first message prepend with system prompt
if len(self.messages) == 0:
self.messages.append(
{
"role": role,
"content": {
"type": "text",
"text": self.system_prompt + "\n\n" + text_content,
},
}
)
else:
self.messages.append(
{"role": role, "content": {"type": "text", "text": text_content}}
)

# For API-style inference from OpenAI and AzureOpenAI
elif isinstance(self.engine, (LMMEngineOpenAI, LMMEngineAzureOpenAI)):
# API-style inference from OpenAI and AzureOpenAI
if isinstance(self.engine, (LMMEngineOpenAI, LMMEngineAzureOpenAI)):
# infer role from previous message
if role != "user":
if self.messages[-1]["role"] == "system":
Expand Down Expand Up @@ -299,8 +201,8 @@ def add_message(
)
self.messages.append(message)

# Custom Qwen Model inference
elif isinstance(self.engine, LMMEngineQwen):
# Locally hosted vLLM model inference
elif isinstance(self.engine, LMMEnginevLLM):
# infer role from previous message
if role != "user":
if self.messages[-1]["role"] == "system":
Expand Down Expand Up @@ -338,50 +240,6 @@ def add_message(
)
self.messages.append(message)

# Custom Llama3.2 Model inference
elif isinstance(self.engine, LMMEngineTogether):
# infer role from previous message
if role != "user":
if self.messages[-1]["role"] == "system":
role = "user"
elif self.messages[-1]["role"] == "user":
role = "assistant"
elif self.messages[-1]["role"] == "assistant":
role = "user"

message = {
"role": role,
"content": [{"type": "text", "text": text_content}],
}

if image_content:
# Check if image_content is a list or a single image
if isinstance(image_content, list):
# If image_content is a list of images, loop through each image
for image in image_content:
base64_image = self.encode_image(image)
message["content"].append(
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}",
},
}
)
else:
# If image_content is a single image, handle it directly
base64_image = self.encode_image(image_content)
message["content"].append(
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}",
},
}
)

self.messages.append(message)

def get_response(
self,
user_message=None,
Expand Down
Loading

0 comments on commit 00d8388

Please sign in to comment.