AbanteAI · PCSwingle · Jan 3, 2024 · Jan 3, 2024 · Jan 4, 2024 · Jan 4, 2024
diff --git a/README.md b/README.md
@@ -58,21 +58,25 @@ cd mentat
 pip install -e .
 ```
 
-## Add your OpenAI API Key
+## Selecting which LLM Model to use
 
-You'll need to have API access to GPT-4 to run Mentat. There are a few options to provide Mentat with your OpenAI API key:
+We highly recommend using the default model, `gpt-4-1106-preview`, as it performs vastly better than any other model benchmarked so far. However, if you wish to use a different model, jump [here](#alternative-models).
+
+### Add your OpenAI API Key
+
+There are a few options to provide Mentat with your OpenAI API key:
 
 1. Create a `.env` file with the line `OPENAI_API_KEY=<your-api-key>` in the directory you plan to run mentat in or in `~/.mentat/.env`
 2. Run `export OPENAI_API_KEY=<your key here>` prior to running Mentat
 3. Place the previous command in your `.bashrc` or `.zshrc` to export your key on every terminal startup
 
 ### Azure OpenAI
 
-Mentat also works with the Azure OpenAI API. To use the Azure API, provide the `AZURE_OPENAI_ENDPOINT` (`https://<your-instance-name>.openai.azure.com/`) and `AZURE_OPENAI_KEY` environment variables instead of `OPENAI_API_KEY`.
+Mentat also works with the Azure OpenAI API. To use the Azure API, provide the `AZURE_API_BASE` (`https://<your-instance-name>.openai.azure.com/`), `AZURE_API_KEY`, and `AZURE_API_VERSION` environment variables instead of `OPENAI_API_KEY`. Then, set the model as described in [configuration.md](docs/configuration.md) to your Azure model.
 
-In addition, Mentat uses the `gpt-4-1106-preview` by default. On Azure, this model is available under a different name: `gpt-4-1106-Preview` (with a capital P). To use it, override the default model as described in [configuration.md](docs/configuration.md).
+### Alternative Models
 
-> **_Important:_** Due to changes in the OpenAI Python SDK, you can no longer use `OPENAI_API_BASE` to access the Azure API with Mentat.
+Mentat uses [litellm](https://github.com/BerriAI/litellm) to retrieve chat completions from models. To use a model other than openai, simply set the model (and possibly the llm_provider) as described in [configuration.md](docs/configuration.md). Additionally, check litellm documentation for the provider that your model is under and supply any needed environment variables.
 
 ## Configuration
 

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -41,30 +41,25 @@ A list of key-value pairs defining a custom [Pygment Style](https://pygments.org
 }
 ```
 
-### Maximum Context
+## 🦙 Alternative Models
 
-If you're using a model other than gpt-3.5 or gpt-4 we won't be able to infer the model's context size so you need to manually set the maximum context like so. 
+Mentat uses [litellm](https://github.com/BerriAI/litellm), so you can direct it to use any local or hosted model. See their documentation to assist setting up any required environment variables, and set the model (and possibly llm_provider, if litellm doesn't automatically recognize the model) in `~/.mentat/.mentat_config.json`:
 ```json
 {
-    "maximum-context": 16000
+    "model": "<model_name>",
+    "llm-provider": "<litellm_llm_provider>"
 }
 ```
-This can also be used to save costs for instance if you want to use a maximum of 16k tokens when using gpt-4-32k.
 
-## 🦙 Alternative Models
+### Maximum Context
 
-Mentat is powered with openai's sdk so you can direct it to use a local model, or any hosted model which conforms to OpenAi's API spec. For example if you host a Llama instance following the directions [here](https://github.com/abetlen/llama-cpp-python#web-server) then you use that model with Mentat by exporting its path e.g.
-```bash
-export OPENAI_API_BASE="http://localhost:8000/v1
-```
-and then setting your model in `~/.mentat/.mentat_config.json`:
+If you use a model unknown to litellm, you can manually set the maximum context of the model like so:
 ```json
 {
-    "model": "/absolute/path/to/7B/llama-model.gguf"
-    "maximum-context": 2048
+    "maximum-context": 16000
 }
 ```
-For models other than gpt-3.5 and gpt-4 we may not be able to infer a maximum context size so you'll also have to set the maximum-context.
+This can also be used to save costs by setting a more conservative limit on models with larger context sizes.
 
 ### Alternative Formats
 

diff --git a/mentat/code_context.py b/mentat/code_context.py
@@ -11,7 +11,7 @@
     split_file_into_intervals,
 )
 from mentat.diff_context import DiffContext
-from mentat.errors import ContextSizeInsufficient, PathValidationError
+from mentat.errors import PathValidationError, ReturnToUser
 from mentat.feature_filters.default_filter import DefaultFilter
 from mentat.feature_filters.embedding_similarity_filter import EmbeddingSimilarityFilter
 from mentat.git_handler import get_paths_with_git_diffs
@@ -152,7 +152,7 @@ async def get_code_message(
             prompt_tokens + meta_tokens + include_files_tokens + config.token_buffer
         )
         if not is_context_sufficient(tokens_used):
-            raise ContextSizeInsufficient()
+            raise ReturnToUser()
         auto_tokens = min(get_max_tokens() - tokens_used, config.auto_context_tokens)
 
         # Get auto included features

diff --git a/mentat/config.py b/mentat/config.py
@@ -6,10 +6,11 @@
 from pathlib import Path
 
 import attr
+import litellm
 from attr import converters, validators
 
 from mentat.git_handler import get_git_root_for_path
-from mentat.llm_api_handler import known_models
+from mentat.llm_api_handler import available_embedding_models, available_models
 from mentat.parsers.parser import Parser
 from mentat.parsers.parser_map import parser_map
 from mentat.session_context import SESSION_CONTEXT
@@ -35,19 +36,25 @@ class Config:
     # Model specific settings
     model: str = attr.field(
         default="gpt-4-1106-preview",
-        metadata={"auto_completions": list(known_models.keys())},
+        metadata={"auto_completions": available_models()},
+    )
+    llm_provider: str | None = attr.field(
+        default=None,
+        metadata={
+            "description": (
+                "The llm provider to use. See https://github.com/BerriAI/litellm for a"
+                " list of all providers and supported models."
+            ),
+            "auto_completions": litellm.provider_list,  # pyright: ignore
+        },
     )
     feature_selection_model: str = attr.field(
         default="gpt-4-1106-preview",
-        metadata={"auto_completions": list(known_models.keys())},
+        metadata={"auto_completions": available_models()},
     )
     embedding_model: str = attr.field(
         default="text-embedding-ada-002",
-        metadata={
-            "auto_completions": [
-                model.name for model in known_models.values() if model.embedding_model
-            ]
-        },
+        metadata={"auto_completions": available_embedding_models()},
     )
     temperature: float = attr.field(
         default=0.2, converter=float, validator=[validators.le(1), validators.ge(0)]

diff --git a/mentat/conversation.py b/mentat/conversation.py
@@ -15,7 +15,6 @@
     ChatCompletionUserMessageParam,
 )
 
-from mentat.errors import MentatError
 from mentat.llm_api_handler import (
     TOKEN_COUNT_WARNING,
     count_tokens,
@@ -41,13 +40,7 @@ async def display_token_count(self):
         stream = session_context.stream
         config = session_context.config
         code_context = session_context.code_context
-        llm_api_handler = session_context.llm_api_handler
 
-        if not await llm_api_handler.is_model_available(config.model):
-            raise MentatError(
-                f"Model {config.model} is not available. Please try again with a"
-                " different model."
-            )
         if "gpt-4" not in config.model:
             stream.send(
                 "Warning: Mentat has only been tested on GPT-4. You may experience"

diff --git a/mentat/errors.py b/mentat/errors.py
@@ -44,8 +44,7 @@ class PathValidationError(Exception):
     pass
 
 
-class ContextSizeInsufficient(Exception):
+class ReturnToUser(Exception):
     """
-    Raised when trying to call the API with too many tokens for that model.
     Will give control back to the user after being raised.
     """
diff --git a/mentat/feature_filters/default_filter.py b/mentat/feature_filters/default_filter.py
@@ -1,7 +1,7 @@
 from typing import Optional
 
 from mentat.code_feature import CodeFeature
-from mentat.errors import ContextSizeInsufficient, ModelError
+from mentat.errors import ModelError, ReturnToUser
 from mentat.feature_filters.embedding_similarity_filter import EmbeddingSimilarityFilter
 from mentat.feature_filters.feature_filter import FeatureFilter
 from mentat.feature_filters.llm_feature_filter import LLMFeatureFilter
@@ -39,7 +39,7 @@ async def filter(self, features: list[CodeFeature]) -> list[CodeFeature]:
                     self.expected_edits,
                     (0.5 if self.user_prompt != "" else 1) * self.loading_multiplier,
                 ).filter(features)
-            except (ModelError, ContextSizeInsufficient):
+            except (ModelError, ReturnToUser):
                 ctx.stream.send(
                     "Feature-selection LLM response invalid. Using TruncateFilter"
                     " instead."