From b9e83c8758fa4a332f76ac5c72eaff31076073d0 Mon Sep 17 00:00:00 2001
From: Twinkll Sisodia <tsisodia@redhat.com>
Date: Mon, 13 Nov 2023 09:48:36 -0500
Subject: [PATCH 1/7] Add prometheus instrumentation

---
 .../ui/gradio/gradio-hftgi-rag-redis/app.py   | 53 ++++++++++++-------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/examples/ui/gradio/gradio-hftgi-rag-redis/app.py b/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
index fda5bb5..8d8d766 100644
--- a/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
+++ b/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
@@ -15,7 +15,7 @@
 from langchain.llms import HuggingFaceTextGenInference
 from langchain.prompts import PromptTemplate
 from langchain.vectorstores.redis import Redis
-from prometheus_client import start_http_server, Counter
+from prometheus_client import start_http_server, Counter, Histogram, Gauge
 
 load_dotenv()
 
@@ -24,7 +24,7 @@
 APP_TITLE = os.getenv('APP_TITLE', 'Talk with your documentation')
 
 INFERENCE_SERVER_URL = os.getenv('INFERENCE_SERVER_URL')
-MAX_NEW_TOKENS = int(os.getenv('MAX_NEW_TOKENS', 512))
+MAX_NEW_TOKENS = int(os.getenv('MAX_NEW_TOKENS', 100))
 TOP_K = int(os.getenv('TOP_K', 10))
 TOP_P = float(os.getenv('TOP_P', 0.95))
 TYPICAL_P = float(os.getenv('TYPICAL_P', 0.95))
@@ -33,16 +33,20 @@
 
 REDIS_URL = os.getenv('REDIS_URL')
 REDIS_INDEX = os.getenv('REDIS_INDEX')
-
+TIMEOUT = int(os.getenv('TIMEOUT', 30))
 # Start Prometheus metrics server
 start_http_server(8000)
 
-# Create a counter metric
-FEEDBACK_COUNTER = Counter("feedback_stars", "Number of feedbacks by stars", ["stars"])
+# Create metric
+FEEDBACK_COUNTER = Counter("feedback_stars", "Number of feedbacks by stars", ["stars", "model_id"])
 MODEL_USAGE_COUNTER = Counter('model_usage', 'Number of times a model was used', ['model_id'])
+REQUEST_TIME = Gauge('request_duration_seconds', 'Time spent processing a request', ['model_id'])
+SATISFACTION = Gauge('satisfaction_rating', 'User satisfaction rating', ['rating'])
+TIMEOUTS = Counter('timeouts_total', 'Total number of request timeouts', ['model_id'])
+
 model_id = ""
 
-client = Client(base_url=INFERENCE_SERVER_URL)
+client = Client(base_url=INFERENCE_SERVER_URL,timeout=TIMEOUT)
 
 # Streaming implementation
 class QueueCallback(BaseCallbackHandler):
@@ -67,20 +71,28 @@ def remove_source_duplicates(input_list):
 def stream(input_text) -> Generator:
 
     global model_id
-
     # Create a Queue
     job_done = object()
 
     # Create a function to call - this will run in a thread
     def task():
         resp = qa_chain({"query": input_text})
-        sources = remove_source_duplicates(resp['source_documents'])
-        
+        sources = remove_source_duplicates(resp['source_documents'])  
         input = str(input_text)
-        response = client.generate(input, max_new_tokens=MAX_NEW_TOKENS)
-        text = response.generated_text
-        model_id = response.model_id
+        start_time = time.perf_counter() # start and end time to get the precise timing of the request
+        
+        try:
+            response = client.generate(input, max_new_tokens=MAX_NEW_TOKENS)
+            end_time = time.perf_counter()
+            model_id = response.model_id
+            # Record successful request time
+            REQUEST_TIME.labels(model_id=model_id).set(end_time - start_time)
+        except TimeoutError:  # or whatever exception your client throws on timeout
+            end_time = time.perf_counter()
+            TIMEOUTS.info({'model_id': model_id, 'timeout_duration': str(end_time - start_time), 'input_text': input})
+
         q.put({"model_id": response.model_id})
+        q.put({"generated_text": response.generated_text})
         print("MODEL ID IS:",model_id)
         print("Question:",input)
         if len(sources) != 0:
@@ -88,6 +100,7 @@ def task():
             for source in sources:
                 q.put("* " + str(source) + "\n")
         q.put(job_done)
+        print("Saving it...")
 
     # Create a thread and start the function
     t = Thread(target=task)
@@ -104,9 +117,11 @@ def task():
             if isinstance(next_token, dict) and 'model_id' in next_token:
                 model_id = next_token['model_id']
                 MODEL_USAGE_COUNTER.labels(model_id=model_id).inc()
+            if isinstance(next_token, dict) and 'generated_text' in next_token:
+                generated_text = next_token['generated_text']    
             elif isinstance(next_token, str):
                 content += next_token     
-                yield next_token, content, model_id
+                yield next_token, generated_text, model_id
         except Empty:
             continue
 
@@ -163,18 +178,19 @@ def task():
     return_source_documents=True
     )
         
-# Gradio implementation
 def ask_llm(message, history):
-    for next_token, content, model_id in stream(message):  
+    for next_token, generated_text, model_id in stream(message):  
         print(model_id) 
-        yield f"{content}\n\nModel ID: {model_id}"
+        model_id_box.update(value=model_id)
+        yield f"{generated_text}\n\nModel ID: {model_id}"
 
 
+# Gradio implementation
 with gr.Blocks(title="HatBot", css="footer {visibility: hidden}") as demo:    
 
     input_box = gr.Textbox(label="Your Question")
     output_answer = gr.Textbox(label="Answer", readonly=True)
-
+    model_id_box = gr.Textbox(visible=False)  # will hold the model_id
 
     gr.Interface(
         fn=ask_llm,
@@ -194,7 +210,8 @@ def ask_llm(message, history):
     def get_feedback(star):
         print("Rating: " + star)
         # Increment the counter based on the star rating received
-        FEEDBACK_COUNTER.labels(stars=str(star)).inc()
+        FEEDBACK_COUNTER.labels(stars=str(star), model_id=model_id).inc()
+        SATISFACTION.labels(rating=star).set(1)
 
         return f"Received {star} star feedback. Thank you!"
 

From ee685ec65f29b2f14df8945e8ea941664bf89d64 Mon Sep 17 00:00:00 2001
From: Twinkll Sisodia <tsisodia@redhat.com>
Date: Mon, 13 Nov 2023 10:19:09 -0500
Subject: [PATCH 2/7] Modify the streaming calls to get model_id and
 source_docs using langchain

---
 .../ui/gradio/gradio-hftgi-rag-redis/app.py   | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/examples/ui/gradio/gradio-hftgi-rag-redis/app.py b/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
index 8d8d766..544aeac 100644
--- a/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
+++ b/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
@@ -16,6 +16,7 @@
 from langchain.prompts import PromptTemplate
 from langchain.vectorstores.redis import Redis
 from prometheus_client import start_http_server, Counter, Histogram, Gauge
+import requests
 
 load_dotenv()
 
@@ -68,6 +69,15 @@ def remove_source_duplicates(input_list):
             unique_list.append(item.metadata['source'])
     return unique_list
 
+def get_model_info():
+    response = requests.get(INFERENCE_SERVER_URL + "/info")
+    json_response = response.json()
+    print(json_response)
+    model_id = json_response['model_id']  # Extract the model_id
+    print("Model ID:", model_id)  # Print the model_id
+    return model_id  # Return the model_id instead of the whole JSON
+
+
 def stream(input_text) -> Generator:
 
     global model_id
@@ -82,17 +92,17 @@ def task():
         start_time = time.perf_counter() # start and end time to get the precise timing of the request
         
         try:
-            response = client.generate(input, max_new_tokens=MAX_NEW_TOKENS)
+            # response = client.generate(input, max_new_tokens=MAX_NEW_TOKENS)
+            model_id = get_model_info()
             end_time = time.perf_counter()
-            model_id = response.model_id
             # Record successful request time
             REQUEST_TIME.labels(model_id=model_id).set(end_time - start_time)
         except TimeoutError:  # or whatever exception your client throws on timeout
             end_time = time.perf_counter()
             TIMEOUTS.info({'model_id': model_id, 'timeout_duration': str(end_time - start_time), 'input_text': input})
 
-        q.put({"model_id": response.model_id})
-        q.put({"generated_text": response.generated_text})
+        q.put({"model_id": model_id})
+        # q.put({"generated_text": resp.generated_text})
         print("MODEL ID IS:",model_id)
         print("Question:",input)
         if len(sources) != 0:
@@ -117,11 +127,11 @@ def task():
             if isinstance(next_token, dict) and 'model_id' in next_token:
                 model_id = next_token['model_id']
                 MODEL_USAGE_COUNTER.labels(model_id=model_id).inc()
-            if isinstance(next_token, dict) and 'generated_text' in next_token:
-                generated_text = next_token['generated_text']    
+            # if isinstance(next_token, dict) and 'generated_text' in next_token:
+            #     generated_text = next_token['generated_text']    
             elif isinstance(next_token, str):
                 content += next_token     
-                yield next_token, generated_text, model_id
+                yield next_token, content, model_id
         except Empty:
             continue
 
@@ -179,10 +189,10 @@ def task():
     )
         
 def ask_llm(message, history):
-    for next_token, generated_text, model_id in stream(message):  
+    for next_token, content, model_id in stream(message):  
         print(model_id) 
         model_id_box.update(value=model_id)
-        yield f"{generated_text}\n\nModel ID: {model_id}"
+        yield f"{content}\n\nModel ID: {model_id}"
 
 
 # Gradio implementation

From 927bc5cb2d968a17c0775553b785e0b5c34fe432 Mon Sep 17 00:00:00 2001
From: Twinkll Sisodia <tsisodia@redhat.com>
Date: Tue, 21 Nov 2023 16:08:30 -0500
Subject: [PATCH 3/7] Code refinement

---
 .../ui/gradio/gradio-hftgi-rag-redis/app.py   | 51 +++++++------------
 1 file changed, 19 insertions(+), 32 deletions(-)

diff --git a/examples/ui/gradio/gradio-hftgi-rag-redis/app.py b/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
index 544aeac..2568e3a 100644
--- a/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
+++ b/examples/ui/gradio/gradio-hftgi-rag-redis/app.py
@@ -42,8 +42,6 @@
 FEEDBACK_COUNTER = Counter("feedback_stars", "Number of feedbacks by stars", ["stars", "model_id"])
 MODEL_USAGE_COUNTER = Counter('model_usage', 'Number of times a model was used', ['model_id'])
 REQUEST_TIME = Gauge('request_duration_seconds', 'Time spent processing a request', ['model_id'])
-SATISFACTION = Gauge('satisfaction_rating', 'User satisfaction rating', ['rating'])
-TIMEOUTS = Counter('timeouts_total', 'Total number of request timeouts', ['model_id'])
 
 model_id = ""
 
@@ -69,22 +67,13 @@ def remove_source_duplicates(input_list):
             unique_list.append(item.metadata['source'])
     return unique_list
 
-def get_model_info():
-    response = requests.get(INFERENCE_SERVER_URL + "/info")
-    json_response = response.json()
-    print(json_response)
-    model_id = json_response['model_id']  # Extract the model_id
-    print("Model ID:", model_id)  # Print the model_id
-    return model_id  # Return the model_id instead of the whole JSON
-
 
 def stream(input_text) -> Generator:
 
     global model_id
-    # Create a Queue
+    # Create queue
     job_done = object()
 
-    # Create a function to call - this will run in a thread
     def task():
         resp = qa_chain({"query": input_text})
         sources = remove_source_duplicates(resp['source_documents'])  
@@ -92,17 +81,17 @@ def task():
         start_time = time.perf_counter() # start and end time to get the precise timing of the request
         
         try:
-            # response = client.generate(input, max_new_tokens=MAX_NEW_TOKENS)
-            model_id = get_model_info()
+            response = requests.get(INFERENCE_SERVER_URL + "/info")
+            json_response = response.json()
+            print(json_response)
+            model_id = json_response['model_id']
             end_time = time.perf_counter()
             # Record successful request time
             REQUEST_TIME.labels(model_id=model_id).set(end_time - start_time)
         except TimeoutError:  # or whatever exception your client throws on timeout
             end_time = time.perf_counter()
-            TIMEOUTS.info({'model_id': model_id, 'timeout_duration': str(end_time - start_time), 'input_text': input})
 
         q.put({"model_id": model_id})
-        # q.put({"generated_text": resp.generated_text})
         print("MODEL ID IS:",model_id)
         print("Question:",input)
         if len(sources) != 0:
@@ -126,9 +115,7 @@ def task():
                 break   
             if isinstance(next_token, dict) and 'model_id' in next_token:
                 model_id = next_token['model_id']
-                MODEL_USAGE_COUNTER.labels(model_id=model_id).inc()
-            # if isinstance(next_token, dict) and 'generated_text' in next_token:
-            #     generated_text = next_token['generated_text']    
+                MODEL_USAGE_COUNTER.labels(model_id=model_id).inc()    
             elif isinstance(next_token, str):
                 content += next_token     
                 yield next_token, content, model_id
@@ -196,33 +183,33 @@ def ask_llm(message, history):
 
 
 # Gradio implementation
-with gr.Blocks(title="HatBot", css="footer {visibility: hidden}") as demo:    
+with gr.Blocks(title="HatBot") as demo:    
+
+    with gr.Column(class_name="column-class"):
+        project_box = gr.Textbox(label="Your Project")
+        customer_box = gr.Textbox(label="Customer")
+        input_box = gr.Textbox(label="Your Question")
+        submit_button = gr.Button("Submit")
+        output_answer = gr.Textbox(label="Answer", readonly=True)
 
-    input_box = gr.Textbox(label="Your Question")
-    output_answer = gr.Textbox(label="Answer", readonly=True)
     model_id_box = gr.Textbox(visible=False)  # will hold the model_id
 
-    gr.Interface(
+    submit_button.click(
         fn=ask_llm,
         inputs=[input_box],
-        outputs=[output_answer],
-        clear_btn=None,
-        retry_btn=None,
-        undo_btn=None,
-        stop_btn=None,
-        description=APP_TITLE
-        )    
+        outputs=[output_answer]
+    )
     
     radio = gr.Radio(["1", "2", "3", "4", "5"], label="Star Rating")
     output = gr.Textbox(label="Output Box")
 
+    download_button = gr.Button("Download")
+    
     @radio.input(inputs=radio, outputs=output)
     def get_feedback(star):
         print("Rating: " + star)
         # Increment the counter based on the star rating received
         FEEDBACK_COUNTER.labels(stars=str(star), model_id=model_id).inc()
-        SATISFACTION.labels(rating=star).set(1)
-
         return f"Received {star} star feedback. Thank you!"
 
 

From 3b99965d945087e52b4a4259b7d95366fe60f1fa Mon Sep 17 00:00:00 2001
From: Twinkll Sisodia <tsisodia@redhat.com>
Date: Tue, 21 Nov 2023 16:09:39 -0500
Subject: [PATCH 4/7] Add loop to send 10 requests and modify script according
 to new UI

---
 .../gradio-hftgi-rag-redis/chatbot_test.py    | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py

diff --git a/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py b/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
new file mode 100644
index 0000000..3a374aa
--- /dev/null
+++ b/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
@@ -0,0 +1,46 @@
+# Import necessary libraries
+import time
+import json
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.common.action_chains import ActionChains
+from selenium.webdriver.support import expected_conditions as EC
+from selenium.webdriver.support.wait import WebDriverWait
+from selenium.webdriver.common.keys import Keys
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+from selenium.common.exceptions import TimeoutException
+import random
+
+driver = webdriver.Firefox()
+driver.get("https://canary-gradio-vectordb.apps.ai-dev01.kni.syseng.devcluster.openshift.com")
+driver.set_window_size(1084, 811)
+timeout = 10
+
+for user in range(10):
+    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, "#component-0 .scroll-hide"))
+    WebDriverWait(driver, timeout).until(element_present)
+
+    # User enters a question
+    project_input = driver.find_element(By.CSS_SELECTOR, "#component-3 .scroll-hide")
+    project_input.clear()  # Clearing any previous input
+    project_input.send_keys(f"User {user + 1}: OpenShift AI")
+    customer_input = driver.find_element(By.CSS_SELECTOR, "#component-4 .scroll-hide")
+    customer_input.clear()  # Clearing any previous input
+    customer_input.send_keys(f"User {user + 1}: Accenture")
+    question_input = driver.find_element(By.CSS_SELECTOR, "#component-5 .scroll-hide")
+    question_input.clear()  # Clearing any previous input
+    question_input.send_keys(f"User {user + 1}: What is OpenShift AI?")
+    driver.find_element(By.ID, "component-6").click()
+
+    label_list=[1,2,3,4,5]
+    random_num = random.choice(label_list)
+    labelname=str(random_num)+'-radio-label'
+    label_id="label[data-testid='"+labelname+"']"
+
+    # # Wait for and click on the feedback element
+    # label_id = "label[data-testid='2-radio-label']"
+    WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, label_id))).click()
+    time.sleep(2)  # Adding a delay for better simulation of user interaction
+
+# Close the browser after the loop completes
+#driver.quit()

From 35a58d255f266e2e210873b8b93fff364bded070 Mon Sep 17 00:00:00 2001
From: Twinkll Sisodia <tsisodia@redhat.com>
Date: Wed, 22 Nov 2023 12:23:47 -0500
Subject: [PATCH 5/7] Script refinement

---
 .../gradio-hftgi-rag-redis/chatbot_test.py    | 30 -------------------
 1 file changed, 30 deletions(-)

diff --git a/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py b/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
index bd08933..27be4bd 100644
--- a/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
+++ b/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
@@ -1,13 +1,10 @@
 # Import necessary libraries
-# Generated by Selenium IDE
-# import pytest
 import time
 import json
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.action_chains import ActionChains
 from selenium.webdriver.support import expected_conditions as EC
-from selenium.webdriver.support import expected_conditions  as EC
 from selenium.webdriver.support.wait import WebDriverWait
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
@@ -40,36 +37,9 @@
     labelname=str(random_num)+'-radio-label'
     label_id="label[data-testid='"+labelname+"']"
 
-    # # Wait for and click on the feedback element
     # label_id = "label[data-testid='2-radio-label']"
     WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, label_id))).click()
     time.sleep(2)  # Adding a delay for better simulation of user interaction
 
 # Close the browser after the loop completes
 #driver.quit()
-
-# read yaml file
-# loop 
-driver = webdriver.Firefox()
-vars = {}
-
-driver.get("https://gradio-hftgi-rag-redis-vectordb.apps.ai-dev01.kni.syseng.devcluster.openshift.com")
-driver.set_window_size(1084, 811)
-timeout = 10
-try:
-    # element_present = EC.presence_of_element_located((By.CSS_SELECTOR, "#chatinput .scroll-hide"))
-    element_present = EC.presence_of_element_located((By.CSS_SELECTOR, "#component-1 .scroll-hide"))
-    WebDriverWait(driver, timeout).until(element_present)
-except TimeoutException:
-    print("Timed out waiting for page to load")
-
-driver.find_element(By.CSS_SELECTOR, "#component-1 .scroll-hide")
-driver.find_element(By.CSS_SELECTOR, "#component-1 .scroll-hide").send_keys("hi how are you")
-driver.find_element(By.ID, "component-12").click()
-
-label_id="label[data-testid='2-radio-label']"
-x = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, label_id))).click()
-
-# if needed add some delay
-# driver.quit()
-# end loop

From 0e0f4ed3bf6e1cbcd6120b5ddce500d089897913 Mon Sep 17 00:00:00 2001
From: Twinkll Sisodia <tsisodia@redhat.com>
Date: Wed, 22 Nov 2023 12:36:41 -0500
Subject: [PATCH 6/7] Add more negative star rating in the list

---
 examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py b/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
index 27be4bd..ba8cce2 100644
--- a/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
+++ b/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
@@ -32,7 +32,7 @@
     question_input.send_keys(f"User {user + 1}: What is OpenShift AI?")
     driver.find_element(By.ID, "component-6").click()
 
-    label_list=[1,2,3,4,5]
+    label_list=[1,1,1,1,1,1,1,1,2,3,4,5]
     random_num = random.choice(label_list)
     labelname=str(random_num)+'-radio-label'
     label_id="label[data-testid='"+labelname+"']"

From ac51717ae6619efaf085461fb8af2aac81f1c236 Mon Sep 17 00:00:00 2001
From: Twinkll Sisodia <tsisodia@redhat.com>
Date: Tue, 28 Nov 2023 10:38:32 -0500
Subject: [PATCH 7/7] Code refinement with correct random list

---
 examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py b/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
index ba8cce2..09029e3 100644
--- a/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
+++ b/examples/ui/gradio/gradio-hftgi-rag-redis/chatbot_test.py
@@ -16,7 +16,7 @@
 driver.set_window_size(1084, 811)
 timeout = 10
 
-for user in range(10):
+for user in range(20):
     element_present = EC.presence_of_element_located((By.CSS_SELECTOR, "#component-0 .scroll-hide"))
     WebDriverWait(driver, timeout).until(element_present)
 
@@ -32,7 +32,7 @@
     question_input.send_keys(f"User {user + 1}: What is OpenShift AI?")
     driver.find_element(By.ID, "component-6").click()
 
-    label_list=[1,1,1,1,1,1,1,1,2,3,4,5]
+    label_list=[1,2,3,4,5]
     random_num = random.choice(label_list)
     labelname=str(random_num)+'-radio-label'
     label_id="label[data-testid='"+labelname+"']"