Skip to content

Commit

Permalink
Patch local LLMs with context_window (#416)
Browse files Browse the repository at this point in the history
* patch

* patch ollama

* patch lmstudio

* patch kobold
  • Loading branch information
cpacker authored Nov 10, 2023
1 parent 873c044 commit 20c08b0
Showing 8 changed files with 8 additions and 4 deletions.
1 change: 1 addition & 0 deletions memgpt/local_llm/koboldcpp/api.py
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ def get_koboldcpp_completion(prompt, context_window, grammar=None, settings=SIMP
# Settings for the generation, includes the prompt + stop tokens, max length, etc
request = settings
request["prompt"] = prompt
request["max_context_length"] = context_window

# Set grammar
if grammar is not None:
2 changes: 1 addition & 1 deletion memgpt/local_llm/koboldcpp/settings.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,6 @@
# '\n#',
# '\n\n\n',
],
"max_context_length": LLM_MAX_TOKENS,
# "max_context_length": LLM_MAX_TOKENS,
"max_length": 512,
}
1 change: 1 addition & 0 deletions memgpt/local_llm/lmstudio/api.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ def get_lmstudio_completion(prompt, context_window, settings=SIMPLE, api="chat")

# Settings for the generation, includes the prompt + stop tokens, max length, etc
request = settings
request["max_tokens"] = context_window

if api == "chat":
# Uses the ChatCompletions API style
2 changes: 1 addition & 1 deletion memgpt/local_llm/lmstudio/settings.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@
# This controls the maximum number of tokens that the model can generate
# Cap this at the model context length (assuming 8k for Mistral 7B)
# "max_tokens": 8000,
"max_tokens": LLM_MAX_TOKENS,
# "max_tokens": LLM_MAX_TOKENS,
# This controls how LM studio handles context overflow
# In MemGPT we handle this ourselves, so this should be commented out
# "lmstudio": {"context_overflow_policy": 2},
1 change: 1 addition & 0 deletions memgpt/local_llm/ollama/api.py
Original file line number Diff line number Diff line change
@@ -26,6 +26,7 @@ def get_ollama_completion(prompt, context_window, settings=SIMPLE, grammar=None)
request = settings
request["prompt"] = prompt
request["model"] = MODEL_NAME
request["options"]["num_ctx"] = context_window

# Set grammar
if grammar is not None:
2 changes: 1 addition & 1 deletion memgpt/local_llm/ollama/settings.py
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@
# '\n#',
# '\n\n\n',
],
"num_ctx": LLM_MAX_TOKENS,
# "num_ctx": LLM_MAX_TOKENS,
},
"stream": False,
# turn off Ollama's own prompt formatting
1 change: 1 addition & 0 deletions memgpt/local_llm/webui/api.py
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ def get_webui_completion(prompt, context_window, settings=SIMPLE, grammar=None):
# Settings for the generation, includes the prompt + stop tokens, max length, etc
request = settings
request["prompt"] = prompt
request["truncation_length"] = context_window # assuming mistral 7b

# Set grammar
if grammar is not None:
2 changes: 1 addition & 1 deletion memgpt/local_llm/webui/settings.py
Original file line number Diff line number Diff line change
@@ -21,5 +21,5 @@
],
"max_new_tokens": 3072,
# "truncation_length": 4096, # assuming llama2 models
"truncation_length": LLM_MAX_TOKENS, # assuming mistral 7b
# "truncation_length": LLM_MAX_TOKENS, # assuming mistral 7b
}

0 comments on commit 20c08b0

Please sign in to comment.