jonathanalgar · jonathanalgar · Feb 19, 2024 · Feb 19, 2024 · Feb 19, 2024 · Feb 19, 2024
diff --git a/.env_example b/.env_example
@@ -4,8 +4,14 @@ LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
 LANGCHAIN_PROJECT=Experiments
 LANGCHAIN_API_KEY=
 
-# OpenAI Platform API key. See https://platform.openai.com/docs/quickstart/account-setup
+# Either "openai" or "openai_azure"
+ALTTEXTER_MODEL=openai
+# openai: See https://platform.openai.com/docs/quickstart/account-setup
 OPENAI_API_KEY=
+# openai_azure:See https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api#retrieve-key-and-endpoint
+AZURE_OPENAI_ENDPOINT=
+AZURE_OPENAI_API_KEY=
+AZURE_OPENAI_DEPLOYMENT=
 
 # Host and port to bind service to
 ALTTEXTER_HOST=0.0.0.0

diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ via [gov.uk:](https://design102.blog.gov.uk/2022/01/14/whats-the-alternative-how
 1. Clone the repo.
 1. Copy `.env-example` to `.env` and fill in the required env variables.
 1. Optionally edit `config.json` to customize CORS and logging.
-1. Run `docker-compose up` to build and start the service.
+1. Run `docker-compose up` (v1) or `docker compose up` (v2) to build and start the service.
 1. Run `python client-example.py example/apis.ipynb` to test. Expected output:
 
     ```bash
@@ -50,14 +50,13 @@ via [gov.uk:](https://design102.blog.gov.uk/2022/01/14/whats-the-alternative-how
 ## Features
 
 * Uses LangChain's [Pydantic parser](https://python.langchain.com/docs/modules/model_io/output_parsers/types/pydantic) as foundation for system prompt to reliably generate a JSON of expected format ([function calling](https://community.openai.com/t/does-the-model-gpt-4-vision-preview-have-function-calling/490197/2) will be even cooler).
-* Optionally integrates with LangSmith (in beta) to serve [trace URL](https://docs.smith.langchain.com/tracing/tracing-faq) for each generation.
+* Optionally integrates with LangSmith to serve [trace URL](https://docs.smith.langchain.com/tracing/tracing-faq) for each generation.
 
 ## TODO
 
 - [ ] Better error handling
 - [ ] Unit tests
 - [ ] Special handling for large files and images
 - [ ] Rate limiting at the service level
-- [ ] Option to use [Azure OpenAI Services](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/gpt-4-turbo-with-vision-is-now-available-on-azure-openai-service/ba-p/4008456)
 - [ ] Explore extending to multimodal models beyond OpenAI
-- [ ] Extend this TODO list
+- [X] Option to use [Azure OpenAI Services](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/gpt-4-turbo-with-vision-is-now-available-on-azure-openai-service/ba-p/4008456)
diff --git a/alttexter.py b/alttexter.py
@@ -6,17 +6,29 @@
 
 from langchain import callbacks
 from langchain.callbacks.tracers.langchain import wait_for_all_tracers
-from langchain.chat_models import ChatOpenAI
 from langchain.output_parsers import PydanticOutputParser
-from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
-from langchain_core.messages import HumanMessage
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_openai import AzureChatOpenAI, ChatOpenAI
 from langsmith import Client
 
 from schema import AlttexterResponse, ImageAltText
 
 
-def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[List[ImageAltText], Optional[str]]:
+def determine_llm() -> ChatOpenAI:
+    """Determine which LLM to use based on environment variables."""
+    model_env = os.getenv("ALTTEXTER_MODEL")
+    if model_env == 'openai':
+        return ChatOpenAI(verbose=True, temperature=0, model="gpt-4-vision-preview", max_tokens=4096)
+    elif model_env == 'openai_azure':
+        return AzureChatOpenAI(verbose=True, temperature=0, openai_api_version="2024-02-15-preview",
+                               azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
+                               model="vision-preview", max_tokens=4096)
+    else:
+        raise ValueError(f"Unsupported model specified: {model_env}")
+
 
+def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[List[ImageAltText], Optional[str]]:
     """
     Processes input text and images to generate alt text and title attributes.
 
@@ -28,20 +40,12 @@ def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[Lis
     Returns:
         Tuple[AlttexterResponse, str]: Generated alt texts and optional tracing URL.
     """
-    llm = ChatOpenAI(
-        verbose=True,
-        temperature=0,
-        model="gpt-4-vision-preview",
-        max_tokens=4096
-    )
+    llm = determine_llm()
 
     content = [
         {
             "type": "text",
-            "text": f"""ARTICLE:
-
-{input_text}
-            """
+            "text": f"""ARTICLE: {input_text}"""
         }
     ]
 
@@ -73,20 +77,12 @@ def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[Lis
         content.append(image_entry)
 
     parser = PydanticOutputParser(pydantic_object=AlttexterResponse)
-
-    system_prompt = SystemMessagePromptTemplate.from_template(
-        template="""You are a world-class expert at generating concise alternative text and title attributes for images defined in technical articles written in markdown format.
-
-For each image in the article use a contextual understanding of the article text and the image itself to generate a concise alternative text and title attribute.
-
-{format_instructions}""",
-        partial_variables={"format_instructions": parser.get_format_instructions()},
-    )
-
     all_image_identifiers = list(images.keys()) + image_urls
+
     messages = ChatPromptTemplate.from_messages(
         [
-            system_prompt,
+            SystemMessage(
+                content='''You are a world-class expert at generating concise alternative text and title attributes for images defined in technical articles written in markdown format.\nFor each image in the article use a contextual understanding of the article text and the image itself to generate a concise alternative text and title attribute.\n{format_instructions}'''.format(format_instructions=parser.get_format_instructions())),
             HumanMessage(content=content),
             HumanMessage(
                 content=f"Tip: List of file names of images including their paths or URLs: {str(all_image_identifiers)}"

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -11,10 +11,14 @@ services:
     volumes:
       - ${ALTTEXTER_CERTS_DIR}:/certs
     environment:
-      - LANGCHAIN_PROJECT
       - LANGCHAIN_TRACING_V2
       - LANGCHAIN_ENDPOINT
+      - LANGCHAIN_PROJECT
       - LANGCHAIN_API_KEY
-      - ALTTEXTER_TOKEN
+      - ALTTEXTER_MODEL
       - OPENAI_API_KEY
+      - AZURE_OPENAI_ENDPOINT
+      - AZURE_OPENAI_API_KEY
+      - AZURE_OPENAI_DEPLOYMENT
+      - ALTTEXTER_TOKEN
     command: python main.py --port ${ALTTEXTER_PORT} --host ${ALTTEXTER_HOST} --certfile /certs/${ALTTEXTER_CERTFILE_NAME} --keyfile /certs/${ALTTEXTER_KEYFILE_NAME}
diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,7 @@
-langchain==0.0.354
-langsmith==0.0.80
-openai==1.6.1
-fastapi==0.105.0
+langchain==0.1.7
+langchain-openai==0.0.6
+fastapi==0.109.2
 pydantic==1.10.12
-uvicorn==0.25.0
+uvicorn==0.27.1
 tiktoken==0.5.2
 nbformat==5.9.2
diff --git a/schema.py b/schema.py
@@ -15,7 +15,7 @@ class AlttexterRequest(BaseModel):
 class ImageAltText(BaseModel):
     name: str = Field(..., description="File name of the image including path or URL.")
     title: str = Field(..., description="Title of the image.")
-    alt_text: str = Field(..., description="Concise alternative text for the image.")
+    alt_text: str = Field(..., description="Concise alternative text for the image. The text should follow the Microsoft Style Guide.")
 
 
 class AlttexterResponse(BaseModel):