Fixes #408 (using the OpenAI Azure API)

AbanteAI · Dec 19, 2023 · ffc56fa · ffc56fa
1 parent 8dc8595
commit ffc56fa
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 5 deletions.
diff --git a/README.md b/README.md
@@ -66,6 +66,13 @@ You'll need to have API access to GPT-4 to run Mentat. There are a few options t
 2. Run `export OPENAI_API_KEY=<your key here>` prior to running Mentat
 3. Place the previous command in your `.bashrc` or `.zshrc` to export your key on every terminal startup
 
+### Azure OpenAI
+
+Instead of the "vanilla" OpenAI API, you may also use the "corporate" version offered by Microsoft ("Azure OpenAI"). In that case, instead of `OPENAI_API_KEY` as described above, provide `AZURE_OPENAI_ENDPOINT` (`https://<your-instance-name>.openai.azure.com/`) and `AZURE_OPENAI_KEY`.
+
+> [!IMPORTANT]
+> if you were previously using the `OPENAI_API_BASE` variable to access Azure OpenAI with Mentat: this no longer works, due to breaking changes in the SDK. See [this issue](https://github.com/AbanteAI/mentat/issues/408) for details. 
+
 ## Configuration
 
 For custom configuration options see [configuration.md](docs/configuration.md)

diff --git a/mentat/llm_api_handler.py b/mentat/llm_api_handler.py
@@ -11,7 +11,13 @@
 import sentry_sdk
 import tiktoken
 from dotenv import load_dotenv
-from openai import APIConnectionError, AsyncOpenAI, AsyncStream, AuthenticationError
+from openai import (
+    APIConnectionError,
+    AsyncAzureOpenAI,
+    AsyncOpenAI,
+    AsyncStream,
+    AuthenticationError,
+)
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionChunk,
@@ -134,6 +140,8 @@ class Model:
 
 known_models: Dict[str, Model] = {
     "gpt-4-1106-preview": Model("gpt-4-1106-preview", 128000, 0.01, 0.03),
+    # model name on Azure 🙄
+    "gpt-4-1106-Preview": Model("gpt-4-1106-Preview", 128000, 0.01, 0.03),
     "gpt-4-vision-preview": Model("gpt-4-vision-preview", 128000, 0.01, 0.03),
     "gpt-4": Model("gpt-4", 8192, 0.03, 0.06),
     "gpt-4-32k": Model("gpt-4-32k", 32768, 0.06, 0.12),
@@ -190,19 +198,28 @@ def initialize_client(self):
             load_dotenv()
         key = os.getenv("OPENAI_API_KEY")
         base_url = os.getenv("OPENAI_API_BASE")
-        if not key:
+        azure_key = os.getenv("AZURE_OPENAI_KEY")
+        azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+
+        if not key and not azure_key:
             raise UserError(
                 "No OpenAI api key detected.\nEither place your key into a .env"
                 " file or export it as an environment variable."
             )
 
         # We don't have any use for a synchronous client, but if we ever do we can easily make it here
-        self.async_client = AsyncOpenAI(api_key=key, base_url=base_url)
+        if azure_endpoint:
+            self.async_client = AsyncAzureOpenAI(
+                api_key=azure_key,
+                api_version="2023-12-01-preview",
+                azure_endpoint=azure_endpoint,
+            )
+        else:
+            self.async_client = AsyncOpenAI(api_key=key, base_url=base_url)
         try:
-            self.async_client.api_key = key
             self.async_client.models.list()  # Test the key
         except AuthenticationError as e:
-            raise UserError(f"OpenAI gave an Authentication Error:\n{e}")
+            raise UserError(f"API gave an Authentication Error:\n{e}")
 
     @overload
     async def call_llm_api(