From d665d9a9ff6d8bb94f2eef40e6940eb7fe1248d2 Mon Sep 17 00:00:00 2001
From: Jonathan Algar <jonathan.algar@outsystems.com>
Date: Mon, 19 Feb 2024 21:28:52 +0000
Subject: [PATCH 1/4] code

---
 .env_example     |  8 +++++++-
 README.md        |  7 +++----
 alttexter.py     | 44 +++++++++++++++++++++++++-------------------
 requirements.txt |  8 +++-----
 schema.py        |  2 +-
 5 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/.env_example b/.env_example
index 4213619..583f3d7 100644
--- a/.env_example
+++ b/.env_example
@@ -4,8 +4,14 @@ LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
 LANGCHAIN_PROJECT=Experiments
 LANGCHAIN_API_KEY=
 
-# OpenAI Platform API key. See https://platform.openai.com/docs/quickstart/account-setup
+# Either "openai" or "openai_azure"
+ALTTEXTER_MODEL=openai
+# openai: See https://platform.openai.com/docs/quickstart/account-setup
 OPENAI_API_KEY=
+# openai_azure:See https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api#retrieve-key-and-endpoint
+AZURE_OPENAI_ENDPOINT=
+AZURE_OPENAI_API_KEY=
+AZURE_OPENAI_DEPLOYMENT=
 
 # Host and port to bind service to
 ALTTEXTER_HOST=0.0.0.0
diff --git a/README.md b/README.md
index 3f8eb75..4950287 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ via [gov.uk:](https://design102.blog.gov.uk/2022/01/14/whats-the-alternative-how
 1. Clone the repo.
 1. Copy `.env-example` to `.env` and fill in the required env variables.
 1. Optionally edit `config.json` to customize CORS and logging.
-1. Run `docker-compose up` to build and start the service.
+1. Run `docker-compose up` (v1) or `docker compose up` (v2) to build and start the service.
 1. Run `python client-example.py example/apis.ipynb` to test. Expected output:
 
     ```bash
@@ -50,7 +50,7 @@ via [gov.uk:](https://design102.blog.gov.uk/2022/01/14/whats-the-alternative-how
 ## Features
 
 * Uses LangChain's [Pydantic parser](https://python.langchain.com/docs/modules/model_io/output_parsers/types/pydantic) as foundation for system prompt to reliably generate a JSON of expected format ([function calling](https://community.openai.com/t/does-the-model-gpt-4-vision-preview-have-function-calling/490197/2) will be even cooler).
-* Optionally integrates with LangSmith (in beta) to serve [trace URL](https://docs.smith.langchain.com/tracing/tracing-faq) for each generation.
+* Optionally integrates with LangSmith to serve [trace URL](https://docs.smith.langchain.com/tracing/tracing-faq) for each generation.
 
 ## TODO
 
@@ -58,6 +58,5 @@ via [gov.uk:](https://design102.blog.gov.uk/2022/01/14/whats-the-alternative-how
 - [ ] Unit tests
 - [ ] Special handling for large files and images
 - [ ] Rate limiting at the service level
-- [ ] Option to use [Azure OpenAI Services](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/gpt-4-turbo-with-vision-is-now-available-on-azure-openai-service/ba-p/4008456)
 - [ ] Explore extending to multimodal models beyond OpenAI
-- [ ] Extend this TODO list
+- [X] Option to use [Azure OpenAI Services](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/gpt-4-turbo-with-vision-is-now-available-on-azure-openai-service/ba-p/4008456)
diff --git a/alttexter.py b/alttexter.py
index 8665bc8..4dacf7e 100644
--- a/alttexter.py
+++ b/alttexter.py
@@ -6,10 +6,10 @@
 
 from langchain import callbacks
 from langchain.callbacks.tracers.langchain import wait_for_all_tracers
-from langchain.chat_models import ChatOpenAI
+from langchain_community.chat_models import AzureChatOpenAI, ChatOpenAI
 from langchain.output_parsers import PydanticOutputParser
 from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
-from langchain_core.messages import HumanMessage
+from langchain_core.messages import HumanMessage, SystemMessage
 from langsmith import Client
 
 from schema import AlttexterResponse, ImageAltText
@@ -28,12 +28,26 @@ def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[Lis
     Returns:
         Tuple[AlttexterResponse, str]: Generated alt texts and optional tracing URL.
     """
-    llm = ChatOpenAI(
-        verbose=True,
-        temperature=0,
-        model="gpt-4-vision-preview",
-        max_tokens=4096
-    )
+
+    if os.getenv('ALTTEXTER_MODEL') == 'openai':
+        llm = ChatOpenAI(
+            verbose=True,
+            temperature=0,
+            model="gpt-4-vision-preview",
+            max_tokens=4096
+        )
+    elif os.getenv('ALTTEXTER_MODEL') == 'openai_azure':
+        llm = AzureChatOpenAI(
+            verbose=True,
+            temperature=0,  
+            openai_api_version="2024-02-15-preview",
+            azure_deployment=os.getenv("AZURE_DEPLOYMENT", "vision-preview"),
+            model="vision-preview",
+            max_tokens=4096
+        )
+    else:
+        error_message = f"Unsupported model specified: {os.getenv('ALTTEXTER_MODEL')}"
+        raise ValueError(error_message)
 
     content = [
         {
@@ -73,20 +87,12 @@ def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[Lis
         content.append(image_entry)
 
     parser = PydanticOutputParser(pydantic_object=AlttexterResponse)
-
-    system_prompt = SystemMessagePromptTemplate.from_template(
-        template="""You are a world-class expert at generating concise alternative text and title attributes for images defined in technical articles written in markdown format.
-
-For each image in the article use a contextual understanding of the article text and the image itself to generate a concise alternative text and title attribute.
-
-{format_instructions}""",
-        partial_variables={"format_instructions": parser.get_format_instructions()},
-    )
-
     all_image_identifiers = list(images.keys()) + image_urls
+
     messages = ChatPromptTemplate.from_messages(
         [
-            system_prompt,
+            SystemMessage(
+                content='''You are a world-class expert at generating concise alternative text and title attributes for images defined in technical articles written in markdown format.\nFor each image in the article use a contextual understanding of the article text and the image itself to generate a concise alternative text and title attribute.\n{format_instructions}'''.format(format_instructions=parser.get_format_instructions())),
             HumanMessage(content=content),
             HumanMessage(
                 content=f"Tip: List of file names of images including their paths or URLs: {str(all_image_identifiers)}"
diff --git a/requirements.txt b/requirements.txt
index 261cbff..1aa6f0a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,6 @@
-langchain==0.0.354
-langsmith==0.0.80
-openai==1.6.1
-fastapi==0.105.0
+langchain==0.1.7
+fastapi==0.109.2
 pydantic==1.10.12
-uvicorn==0.25.0
+uvicorn==0.27.1
 tiktoken==0.5.2
 nbformat==5.9.2
\ No newline at end of file
diff --git a/schema.py b/schema.py
index 436b268..0b6d920 100644
--- a/schema.py
+++ b/schema.py
@@ -15,7 +15,7 @@ class AlttexterRequest(BaseModel):
 class ImageAltText(BaseModel):
     name: str = Field(..., description="File name of the image including path or URL.")
     title: str = Field(..., description="Title of the image.")
-    alt_text: str = Field(..., description="Concise alternative text for the image.")
+    alt_text: str = Field(..., description="Concise alternative text for the image. The text should follow the Microsoft Style Guide.")
 
 
 class AlttexterResponse(BaseModel):

From 1b7578f00d2cfea2a0f84d4ca18131bf96e03e80 Mon Sep 17 00:00:00 2001
From: Jonathan Algar <jonathan.algar@outsystems.com>
Date: Mon, 19 Feb 2024 21:59:50 +0000
Subject: [PATCH 2/4] refactor out function

---
 alttexter.py | 38 ++++++++++++++------------------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/alttexter.py b/alttexter.py
index 4dacf7e..62f50a0 100644
--- a/alttexter.py
+++ b/alttexter.py
@@ -15,6 +15,18 @@
 from schema import AlttexterResponse, ImageAltText
 
 
+def determine_llm() -> ChatModelBase:
+    """Determine which LLM to use based on environment variables."""
+    model_env = os.getenv('ALTTEXTER_MODEL')
+    if model_env == 'openai':
+        return ChatOpenAI(verbose=True, temperature=0, model="gpt-4-vision-preview", max_tokens=4096)
+    elif model_env == 'openai_azure':
+        return AzureChatOpenAI(verbose=True, temperature=0, openai_api_version="2024-02-15-preview",
+                               azure_deployment=os.getenv("AZURE_DEPLOYMENT", "vision-preview"),
+                               model="vision-preview", max_tokens=4096)
+    else:
+        raise ValueError(f"Unsupported model specified: {model_env}")
+
 def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[List[ImageAltText], Optional[str]]:
 
     """
@@ -28,34 +40,12 @@ def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[Lis
     Returns:
         Tuple[AlttexterResponse, str]: Generated alt texts and optional tracing URL.
     """
-
-    if os.getenv('ALTTEXTER_MODEL') == 'openai':
-        llm = ChatOpenAI(
-            verbose=True,
-            temperature=0,
-            model="gpt-4-vision-preview",
-            max_tokens=4096
-        )
-    elif os.getenv('ALTTEXTER_MODEL') == 'openai_azure':
-        llm = AzureChatOpenAI(
-            verbose=True,
-            temperature=0,  
-            openai_api_version="2024-02-15-preview",
-            azure_deployment=os.getenv("AZURE_DEPLOYMENT", "vision-preview"),
-            model="vision-preview",
-            max_tokens=4096
-        )
-    else:
-        error_message = f"Unsupported model specified: {os.getenv('ALTTEXTER_MODEL')}"
-        raise ValueError(error_message)
+    llm = determine_llm()
 
     content = [
         {
             "type": "text",
-            "text": f"""ARTICLE:
-
-{input_text}
-            """
+            "text": f"""ARTICLE: {input_text}"""
         }
     ]
 

From 766bf7d830f23acc8dd812fd82bd5eef0dfcff13 Mon Sep 17 00:00:00 2001
From: Jonathan Algar <jonathan.algar@outsystems.com>
Date: Mon, 19 Feb 2024 22:08:39 +0000
Subject: [PATCH 3/4] fix

---
 alttexter.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/alttexter.py b/alttexter.py
index 62f50a0..4e49372 100644
--- a/alttexter.py
+++ b/alttexter.py
@@ -6,16 +6,16 @@
 
 from langchain import callbacks
 from langchain.callbacks.tracers.langchain import wait_for_all_tracers
-from langchain_community.chat_models import AzureChatOpenAI, ChatOpenAI
 from langchain.output_parsers import PydanticOutputParser
-from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
+from langchain.prompts import ChatPromptTemplate
+from langchain_community.chat_models import AzureChatOpenAI, ChatOpenAI
 from langchain_core.messages import HumanMessage, SystemMessage
 from langsmith import Client
 
 from schema import AlttexterResponse, ImageAltText
 
 
-def determine_llm() -> ChatModelBase:
+def determine_llm() -> ChatOpenAI:
     """Determine which LLM to use based on environment variables."""
     model_env = os.getenv('ALTTEXTER_MODEL')
     if model_env == 'openai':
@@ -27,8 +27,8 @@ def determine_llm() -> ChatModelBase:
     else:
         raise ValueError(f"Unsupported model specified: {model_env}")
 
-def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[List[ImageAltText], Optional[str]]:
 
+def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[List[ImageAltText], Optional[str]]:
     """
     Processes input text and images to generate alt text and title attributes.
 

From a02e77e83c7ad332f53f9f45d582335f597e7197 Mon Sep 17 00:00:00 2001
From: Jonathan Algar <jonathan.algar@outsystems.com>
Date: Mon, 19 Feb 2024 23:10:12 +0000
Subject: [PATCH 4/4] tweak

---
 alttexter.py       | 6 +++---
 docker-compose.yml | 8 ++++++--
 requirements.txt   | 1 +
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/alttexter.py b/alttexter.py
index 4e49372..7cdde2c 100644
--- a/alttexter.py
+++ b/alttexter.py
@@ -8,8 +8,8 @@
 from langchain.callbacks.tracers.langchain import wait_for_all_tracers
 from langchain.output_parsers import PydanticOutputParser
 from langchain.prompts import ChatPromptTemplate
-from langchain_community.chat_models import AzureChatOpenAI, ChatOpenAI
 from langchain_core.messages import HumanMessage, SystemMessage
+from langchain_openai import AzureChatOpenAI, ChatOpenAI
 from langsmith import Client
 
 from schema import AlttexterResponse, ImageAltText
@@ -17,12 +17,12 @@
 
 def determine_llm() -> ChatOpenAI:
     """Determine which LLM to use based on environment variables."""
-    model_env = os.getenv('ALTTEXTER_MODEL')
+    model_env = os.getenv("ALTTEXTER_MODEL")
     if model_env == 'openai':
         return ChatOpenAI(verbose=True, temperature=0, model="gpt-4-vision-preview", max_tokens=4096)
     elif model_env == 'openai_azure':
         return AzureChatOpenAI(verbose=True, temperature=0, openai_api_version="2024-02-15-preview",
-                               azure_deployment=os.getenv("AZURE_DEPLOYMENT", "vision-preview"),
+                               azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"),
                                model="vision-preview", max_tokens=4096)
     else:
         raise ValueError(f"Unsupported model specified: {model_env}")
diff --git a/docker-compose.yml b/docker-compose.yml
index 33d0b7c..171d9f4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -11,10 +11,14 @@ services:
     volumes:
       - ${ALTTEXTER_CERTS_DIR}:/certs
     environment:
-      - LANGCHAIN_PROJECT
       - LANGCHAIN_TRACING_V2
       - LANGCHAIN_ENDPOINT
+      - LANGCHAIN_PROJECT
       - LANGCHAIN_API_KEY
-      - ALTTEXTER_TOKEN
+      - ALTTEXTER_MODEL
       - OPENAI_API_KEY
+      - AZURE_OPENAI_ENDPOINT
+      - AZURE_OPENAI_API_KEY
+      - AZURE_OPENAI_DEPLOYMENT
+      - ALTTEXTER_TOKEN
     command: python main.py --port ${ALTTEXTER_PORT} --host ${ALTTEXTER_HOST} --certfile /certs/${ALTTEXTER_CERTFILE_NAME} --keyfile /certs/${ALTTEXTER_KEYFILE_NAME}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 1aa6f0a..623cfb3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 langchain==0.1.7
+langchain-openai==0.0.6
 fastapi==0.109.2
 pydantic==1.10.12
 uvicorn==0.27.1