Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LiteLLM as an agent for model connections #53

Merged
merged 20 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/workflows/pr-agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name: AI PR Agent

on:
pull_request:
types: [opened, reopened, ready_for_review]

jobs:
pr_agent_job:
uses: SolaceDev/ai-build-actions/.github/workflows/ai_pr.yaml@use_sonnet_3_5
134 changes: 134 additions & 0 deletions .pr_agent.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
[config]
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
model_turbo="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
fallback_models="[bedrock/anthropic.claude-3-sonnet-20240229-v1:0]"
git_provider="github"
publish_output=true
publish_output_progress=false
verbosity_level=2 # 0,1,2
use_extra_bad_extensions=false
use_wiki_settings_file=true
use_repo_settings_file=true
use_global_settings_file=true
ai_timeout=120 # 2minutes
max_description_tokens = 800
max_commits_tokens = 500
max_model_tokens = 64000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
patch_extra_lines = 200
secret_provider="google_cloud_storage"
cli_mode=false
ai_disclaimer_title="" # Pro feature, title for a collapsible disclaimer to AI outputs
ai_disclaimer="" # Pro feature, full text for the AI disclaimer

[pr_reviewer] # /review #
# enable/disable features
require_score_review=false
require_tests_review=true
require_estimate_effort_to_review=true
require_can_be_split_review=false
# soc2
require_soc2_ticket=false
soc2_ticket_prompt="Does the PR description include a link to ticket in a project management system (e.g., Jira, Asana, Trello, etc.) ?"
# general options
num_code_suggestions=4
inline_code_comments = true
ask_and_reflect=false
#automatic_review=true
persistent_comment=true
extra_instructions = ""
final_update_message = true
# review labels
enable_review_labels_security=true
enable_review_labels_effort=true
# specific configurations for incremental review (/review -i)
require_all_thresholds_for_incremental_review=false
minimal_commits_for_incremental_review=0
minimal_minutes_for_incremental_review=0
enable_help_text=true # Determines whether to include help text in the PR review. Enabled by default.
# auto approval
enable_auto_approval=false
maximal_review_effort=5

[pr_description] # /describe #
publish_labels=true
add_original_user_description=true
keep_original_user_title=true
generate_ai_title=false
use_bullet_points=true
extra_instructions = ""
enable_pr_type=true
final_update_message = true
enable_help_text=false
enable_help_comment=false
# describe as comment
publish_description_as_comment=false
publish_description_as_comment_persistent=true
## changes walkthrough section
enable_semantic_files_types=true
collapsible_file_list='adaptive' # true, false, 'adaptive'
inline_file_summary=false # false, true, 'table'
# markers
use_description_markers=false
include_generated_by_header=true

[pr_code_suggestions] # /improve #
max_context_tokens=8000
num_code_suggestions=4
commitable_code_suggestions = false
extra_instructions = ""
rank_suggestions = false
enable_help_text=true
persistent_comment=false
# params for '/improve --extended' mode
auto_extended_mode=true
num_code_suggestions_per_chunk=5
max_number_of_calls = 3
parallel_calls = true
rank_extended_suggestions = false
final_clip_factor = 0.8

[pr_add_docs] # /add_docs #
extra_instructions = ""
docs_style = "Sphinx Style" # "Google Style with Args, Returns, Attributes...etc", "Numpy Style", "Sphinx Style", "PEP257", "reStructuredText"

[pr_update_changelog] # /update_changelog #
push_changelog_changes=false
extra_instructions = ""

[pr_analyze] # /analyze #

[pr_test] # /test #
extra_instructions = ""
testing_framework = "" # specify the testing framework you want to use
num_tests=3 # number of tests to generate. max 5.
avoid_mocks=true # if true, the generated tests will prefer to use real objects instead of mocks
file = "" # in case there are several components with the same name, you can specify the relevant file
class_name = "" # in case there are several methods with the same name in the same file, you can specify the relevant class name
enable_help_text=true

[pr_improve_component] # /improve_component #
num_code_suggestions=4
extra_instructions = ""
file = "" # in case there are several components with the same name, you can specify the relevant file
class_name = ""

[checks] # /checks (pro feature) #
enable_auto_checks_feedback=true
excluded_checks_list=["lint"] # list of checks to exclude, for example: ["check1", "check2"]
persistent_comment=true
enable_help_text=true

[pr_help] # /help #

[pr_config] # /config #

[github]
# The type of deployment to create. Valid values are 'app' or 'user'.
deployment_type = "user"
ratelimit_retries = 5
base_url = "https://api.github.com"
publish_inline_comments_fallback_with_verification = true
try_fix_invalid_inline_comments = true

[litellm]
drop_params = true
4 changes: 4 additions & 0 deletions docs/components/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
| [langchain_vector_store_delete](langchain_vector_store_delete.md) | This component allows for entries in a LangChain Vector Store to be deleted. This is needed for the continued maintenance of the vector store. Due to the nature of langchain vector stores, you need to specify an embedding component even though it is not used in this component. |
| [langchain_vector_store_embedding_index](langchain_vector_store_embedding_index.md) | Use LangChain Vector Stores to index text for later semantic searches. This will take text, run it through an embedding model and then store it in a vector database. |
| [langchain_vector_store_embedding_search](langchain_vector_store_embedding_search.md) | Use LangChain Vector Stores to search a vector store with a semantic search. This will take text, run it through an embedding model with a query embedding and then find the closest matches in the store. |
| [litellm_chat_model](litellm_chat_model.md) | LiteLLM chat model component |
| [litellm_chat_model_with_history](litellm_chat_model_with_history.md) | LiteLLM model handler component with conversation history |
| [message_filter](message_filter.md) | A filtering component. This will apply a user configurable expression. If the expression evaluates to True, the message will be passed on. If the expression evaluates to False, the message will be discarded. If the message is discarded, any previous components that require an acknowledgement will be acknowledged. |
| [openai_chat_model](openai_chat_model.md) | OpenAI chat model component |
| [openai_chat_model_with_history](openai_chat_model_with_history.md) | OpenAI chat model component with conversation history |
Expand All @@ -30,3 +32,5 @@
| [websearch_bing](websearch_bing.md) | Perform a search query on Bing. |
| [websearch_duckduckgo](websearch_duckduckgo.md) | Perform a search query on DuckDuckGo. |
| [websearch_google](websearch_google.md) | Perform a search query on Google. |
| [websocket_input](websocket_input.md) | Listen for incoming messages on a websocket connection. |
| [websocket_output](websocket_output.md) | Send messages to a websocket connection. |
84 changes: 84 additions & 0 deletions docs/components/litellm_chat_model.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# LiteLLMChatModel

LiteLLM chat model component

## Configuration Parameters

```yaml
component_name: <user-supplied-name>
component_module: litellm_chat_model
component_config:
action: <string>
efunneko marked this conversation as resolved.
Show resolved Hide resolved
load_balancer: <string>
embedding_params: <string>
temperature: <string>
stream_to_flow: <string>
stream_to_next_component: <string>
llm_mode: <string>
stream_batch_size: <string>
set_response_uuid_in_user_properties: <boolean>
history_max_turns: <string>
history_max_time: <string>
history_max_turns: <string>
history_max_time: <string>
```

| Parameter | Required | Default | Description |
| --- | --- | --- | --- |
| action | True | inference | The action to perform (e.g., 'inference', 'embedding') |
| load_balancer | False | | Add a list of models to load balancer. |
| embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input |
| temperature | False | 0.7 | Sampling temperature to use |
| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. |
| stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. |
| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. |
| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. |
| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. |
| history_max_turns | False | 10 | Maximum number of conversation turns to keep in history |
| history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) |
| history_max_turns | False | 10 | Maximum number of conversation turns to keep in history |
| history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) |


## Component Input Schema

```
{
messages: [
{
role: <string>,
content: <string>
},
...
],
clear_history_but_keep_depth: <integer>
}
```
| Field | Required | Description |
| --- | --- | --- |
| messages | True | |
| messages[].role | True | |
| messages[].content | True | |
| clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. |


## Component Output Schema

```
{
content: <string>,
chunk: <string>,
response_uuid: <string>,
first_chunk: <boolean>,
last_chunk: <boolean>,
streaming: <boolean>
}
```
| Field | Required | Description |
| --- | --- | --- |
| content | True | The generated response from the model |
| chunk | False | The current chunk of the response |
| response_uuid | False | The UUID of the response |
| first_chunk | False | Whether this is the first chunk of the response |
| last_chunk | False | Whether this is the last chunk of the response |
| streaming | False | Whether this is a streaming response |
84 changes: 84 additions & 0 deletions docs/components/litellm_chat_model_with_history.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# LiteLLMChatModelWithHistory

LiteLLM model handler component with conversation history

## Configuration Parameters

```yaml
component_name: <user-supplied-name>
component_module: litellm_chat_model_with_history
component_config:
action: <string>
load_balancer: <string>
embedding_params: <string>
temperature: <string>
stream_to_flow: <string>
stream_to_next_component: <string>
llm_mode: <string>
stream_batch_size: <string>
set_response_uuid_in_user_properties: <boolean>
history_max_turns: <string>
history_max_time: <string>
history_max_turns: <string>
history_max_time: <string>
```

| Parameter | Required | Default | Description |
| --- | --- | --- | --- |
| action | True | inference | The action to perform (e.g., 'inference', 'embedding') |
| load_balancer | False | | Add a list of models to load balancer. |
| embedding_params | False | | LiteLLM model parameters. The model, api_key and base_url are mandatory.find more models at https://docs.litellm.ai/docs/providersfind more parameters at https://docs.litellm.ai/docs/completion/input |
| temperature | False | 0.7 | Sampling temperature to use |
| stream_to_flow | False | | Name the flow to stream the output to - this must be configured for llm_mode='stream'. This is mutually exclusive with stream_to_next_component. |
| stream_to_next_component | False | False | Whether to stream the output to the next component in the flow. This is mutually exclusive with stream_to_flow. |
| llm_mode | False | none | The mode for streaming results: 'sync' or 'stream'. 'stream' will just stream the results to the named flow. 'none' will wait for the full response. |
| stream_batch_size | False | 15 | The minimum number of words in a single streaming result. Default: 15. |
| set_response_uuid_in_user_properties | False | False | Whether to set the response_uuid in the user_properties of the input_message. This will allow other components to correlate streaming chunks with the full response. |
| history_max_turns | False | 10 | Maximum number of conversation turns to keep in history |
| history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) |
| history_max_turns | False | 10 | Maximum number of conversation turns to keep in history |
| history_max_time | False | 3600 | Maximum time to keep conversation history (in seconds) |


## Component Input Schema

```
{
messages: [
{
role: <string>,
content: <string>
},
...
],
clear_history_but_keep_depth: <integer>
}
```
| Field | Required | Description |
| --- | --- | --- |
| messages | True | |
| messages[].role | True | |
| messages[].content | True | |
| clear_history_but_keep_depth | False | Clear history but keep the last N messages. If 0, clear all history. If not set, do not clear history. |


## Component Output Schema

```
{
content: <string>,
chunk: <string>,
response_uuid: <string>,
first_chunk: <boolean>,
last_chunk: <boolean>,
streaming: <boolean>
}
```
| Field | Required | Description |
| --- | --- | --- |
| content | True | The generated response from the model |
| chunk | False | The current chunk of the response |
| response_uuid | False | The UUID of the response |
| first_chunk | False | Whether this is the first chunk of the response |
| last_chunk | False | Whether this is the last chunk of the response |
| streaming | False | Whether this is a streaming response |
12 changes: 11 additions & 1 deletion docs/components/openai_chat_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,19 @@ component_config:

```
{
content: <string>
content: <string>,
chunk: <string>,
response_uuid: <string>,
first_chunk: <boolean>,
last_chunk: <boolean>,
streaming: <boolean>
}
```
| Field | Required | Description |
| --- | --- | --- |
| content | True | The generated response from the model |
| chunk | False | The current chunk of the response |
| response_uuid | False | The UUID of the response |
| first_chunk | False | Whether this is the first chunk of the response |
| last_chunk | False | Whether this is the last chunk of the response |
| streaming | False | Whether this is a streaming response |
12 changes: 11 additions & 1 deletion docs/components/openai_chat_model_with_history.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,19 @@ component_config:

```
{
content: <string>
content: <string>,
chunk: <string>,
response_uuid: <string>,
first_chunk: <boolean>,
last_chunk: <boolean>,
streaming: <boolean>
}
```
| Field | Required | Description |
| --- | --- | --- |
| content | True | The generated response from the model |
| chunk | False | The current chunk of the response |
| response_uuid | False | The UUID of the response |
| first_chunk | False | Whether this is the first chunk of the response |
| last_chunk | False | Whether this is the last chunk of the response |
| streaming | False | Whether this is a streaming response |
Loading
Loading