Skip to content

Commit

Permalink
Merge pull request #17 from khoj-ai/features/upload-docs-from-whatsapp
Browse files Browse the repository at this point in the history
Add support for uploading documents through WhatsApp
  • Loading branch information
sabaimran authored Mar 24, 2024
2 parents a71f571 + 6e36cff commit abbccbd
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 39 deletions.
8 changes: 8 additions & 0 deletions src/flint/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
Sorry, I wasn't able to understand your voice message this time. Could you please try typing out your message or send a shorter message? If you'd like to help me improve, email my creators at [email protected].
""".strip()

KHOJ_FAILED_DOCUMENT_UPLOAD_MESSAGE = f"""
Sorry, I wasn't able to process your document this time. Could you please try sending a different document? If you'd like to help me improve, email my creators at [email protected].
""".strip()

KHOJ_MEDIA_NOT_IMPLEMENTED_MESSAGE = f"""
Sorry, I'm not yet able to process this type of media. Could you please try sending a different type of media? If you'd like to help me improve, email my creators at [email protected].
""".strip()

KHOJ_API_URL = os.getenv("KHOJ_API_URL", "https://app.khoj.dev")
KHOJ_API_CLIENT_ID = os.getenv("KHOJ_API_CLIENT_ID")
KHOJ_API_CLIENT_SECRET = os.getenv("KHOJ_API_CLIENT_SECRET")
61 changes: 41 additions & 20 deletions src/flint/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,22 @@

# External Packages
import openai
import requests
from requests import Session

# Internal Packages
from flint.constants import KHOJ_API_URL, KHOJ_API_CLIENT_SECRET, KHOJ_API_CLIENT_ID

whatsapp_token = os.getenv("WHATSAPP_TOKEN")
whatsapp_cloud_api_session = Session()
WHATSAPP_TOKEN = os.getenv("WHATSAPP_TOKEN")
whatsapp_cloud_api_session.headers.update({"Authorization": f"Bearer {WHATSAPP_TOKEN}"})

logger = logging.getLogger(__name__)

KHOJ_CHAT_API_ENDPOINT = f"{KHOJ_API_URL}/api/chat?client_id={KHOJ_API_CLIENT_ID}"
KHOJ_INDEX_API_ENDPOINT = f"{KHOJ_API_URL}/api/v1/index/update?client_id={KHOJ_API_CLIENT_ID}&client=whatsapp"

KHOJ_CLOUD_API_SESSION = Session()
KHOJ_CLOUD_API_SESSION.headers.update({"Authorization": f"Bearer {KHOJ_API_CLIENT_SECRET}"})

COMMANDS = {
"/online": "/online",
Expand Down Expand Up @@ -48,14 +54,9 @@ def make_whatsapp_image_payload(media_id, to):
return {"type": "image", "image": {"id": media_id}, "to": to, "messaging_product": "whatsapp"}


def download_audio_message(audio_url, random_id):
headers = {
"Authorization": f"Bearer {whatsapp_token}",
}
response = requests.get(audio_url, headers=headers)
def download_media(url, filepath):
response = whatsapp_cloud_api_session.get(url)

# Create output file path with user_id and current timestamp
filepath = f"/tmp/{random_id}_audio_{int(time.time() * 1000)}.ogg"
# Download the voice message OGG file
with open(filepath, "wb") as f:
f.write(response.content)
Expand All @@ -64,14 +65,35 @@ def download_audio_message(audio_url, random_id):
return os.path.join(os.getcwd(), filepath)


def upload_document_to_khoj(document_url, random_id, phone_id, mime_type):
file_ending = mime_type.split("/")[1]
document_filepath = download_media(
document_url, f"/tmp/{random_id}_document_{int(time.time() * 1000)}.{file_ending}"
)

encoded_phone_number = urllib.parse.quote(phone_id)

with open(document_filepath, "rb") as f:
files = [
("files", (document_filepath, f, mime_type)),
]
khoj_api = f"{KHOJ_INDEX_API_ENDPOINT}&phone_number={encoded_phone_number}&create_if_not_exists=true"
response = KHOJ_CLOUD_API_SESSION.post(khoj_api, files=files)

if response.status_code == 200:
return "Document uploaded successfully"
else:
response.raise_for_status()


def transcribe_audio_message(audio_url: str, uuid: str, logger: Logger) -> str:
"Transcribe audio message using OpenAI whisper"

start_time = time.time()

try:
# Download audio file
audio_message_file = download_audio_message(audio_url, uuid)
audio_message_file = download_media(audio_url, f"/tmp/{uuid}_audio_{int(time.time() * 1000)}.ogg")
except Exception as e:
logger.error(f"Failed to download audio by {uuid} with error {e}", exc_info=True)
return None
Expand Down Expand Up @@ -103,6 +125,7 @@ def send_message_to_khoj_chat(user_message: str, user_number: str) -> str:
"""
Send the user message to the backend LLM service and return the response
"""
start_time = time.time()
encoded_phone_number = urllib.parse.quote(user_number)

if user_message.startswith(tuple(UNIMPLEMENTED_COMMANDS.keys())):
Expand All @@ -121,12 +144,14 @@ def send_message_to_khoj_chat(user_message: str, user_number: str) -> str:

encoded_user_message = urllib.parse.quote(user_message)

headers = {
"Authorization": f"Bearer {KHOJ_API_CLIENT_SECRET}",
}

khoj_api = f"{KHOJ_CHAT_API_ENDPOINT}&phone_number={encoded_phone_number}&q={encoded_user_message}&stream=false&create_if_not_exists=true"
response = requests.get(khoj_api, headers=headers)
response = KHOJ_CLOUD_API_SESSION.get(khoj_api)

end_time = time.time()
response_time = end_time - start_time
formatted_response_time = "{:.2f}".format(response_time)
logger.info(f"Khoj chat response time: {formatted_response_time} seconds")

if response.status_code == 200:
return response.json()
elif response.status_code == 429:
Expand All @@ -137,18 +162,14 @@ def send_message_to_khoj_chat(user_message: str, user_number: str) -> str:


def upload_media_to_whatsapp(media_filepath: str, media_type: str, phone_id: str) -> str:
headers = {
"Authorization": f"Bearer {whatsapp_token}",
}

with open(media_filepath, "rb") as f:
files = {
"file": (media_filepath, f, media_type),
"type": (None, media_type),
"messaging_product": (None, "whatsapp"),
}

response = requests.post(f"https://graph.facebook.com/v18.0/{phone_id}/media", headers=headers, files=files)
response = whatsapp_cloud_api_session.post(f"https://graph.facebook.com/v18.0/{phone_id}/media", files=files)

if response.status_code == 200:
response_json = response.json()
Expand Down
86 changes: 67 additions & 19 deletions src/flint/routers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import os
import requests
from requests import Session
import time
import uuid

Expand All @@ -17,19 +18,26 @@
send_message_to_khoj_chat,
make_whatsapp_image_payload,
upload_media_to_whatsapp,
upload_document_to_khoj,
)
from flint.constants import (
KHOJ_INTRO_MESSAGE,
KHOJ_FAILED_AUDIO_TRANSCRIPTION_MESSAGE,
KHOJ_FAILED_DOCUMENT_UPLOAD_MESSAGE,
KHOJ_MEDIA_NOT_IMPLEMENTED_MESSAGE,
)


# Initialize Router
whatsapp_cloud_api_session = Session()
whatsapp_token = os.getenv("WHATSAPP_TOKEN")
whatsapp_cloud_api_session.headers.update({"Authorization": f"Bearer {whatsapp_token}"})
verify_token = os.getenv("WHATSAPP_VERIFY_TOKEN", "verify_token")
logger = logging.getLogger(__name__)
api = APIRouter()

SUPPORTED_FILE_TYPES = ["audio/ogg", "text/plain", "application/pdf"]


@api.get("/health")
async def health() -> Response:
Expand Down Expand Up @@ -134,59 +142,99 @@ async def handle_whatsapp_message(body):
message_body = handle_audio_message(audio_id)
except ValueError as e:
logger.error(f"Failed to handle audio message: {e}", exc_info=True)
await response_to_user_whatsapp(KHOJ_FAILED_AUDIO_TRANSCRIPTION_MESSAGE, from_number, body, intro_message)
await response_to_user_whatsapp(
KHOJ_FAILED_AUDIO_TRANSCRIPTION_MESSAGE, from_number, body, intro_message, direct_message=True
)
return
elif message["type"] == "document":
logger.info("document message received")
document_id = message["document"]["id"]
try:
success = handle_document_message(document_id, from_number)
if success:
message_body = "Thanks for sharing this document with me! I've uploaded it to your Khoj account."
else:
message_body = KHOJ_FAILED_DOCUMENT_UPLOAD_MESSAGE
await response_to_user_whatsapp(message_body, from_number, body, intro_message, direct_message=True)
return
except ValueError as e:
logger.error(f"Failed to handle document message: {e}", exc_info=True)
await response_to_user_whatsapp(
KHOJ_FAILED_DOCUMENT_UPLOAD_MESSAGE, from_number, body, intro_message, direct_message=True
)
return
elif message["type"] == "reaction":
logger.info(f"reaction message received: {message['reaction']['emoji']}")
return
else:
logger.error(f"Unsupported message type: {message['type']}", exc_info=True)
await response_to_user_whatsapp(
KHOJ_MEDIA_NOT_IMPLEMENTED_MESSAGE, from_number, body, intro_message, direct_message=True
)
return
await response_to_user_whatsapp(message_body, from_number, body, intro_message)


# handle audio messages
def handle_audio_message(audio_id):
random_uuid = uuid.uuid4()
audio_url = get_media_url(audio_id)
audio_url, mime_type = get_media_url(audio_id)
return transcribe_audio_message(audio_url, random_uuid, logger)


# handle document messages
def handle_document_message(document_id, phone_id):
random_uuid = uuid.uuid4()
document_url, mime_type = get_media_url(document_id)
return upload_document_to_khoj(document_url, random_uuid, phone_id, mime_type)


# get the media url from the media id
def get_media_url(media_id):
headers = {
"Authorization": f"Bearer {whatsapp_token}",
}
url = f"https://graph.facebook.com/v16.0/{media_id}/"
response = requests.get(url, headers=headers).json()
response = whatsapp_cloud_api_session.get(url).json()
mime_type = response["mime_type"]
if mime_type not in SUPPORTED_FILE_TYPES:
logger.info(f"Unsupported file type: {mime_type}")
raise ValueError(f"Unsupported file type: {mime_type}")

file_size = response["file_size"]
# If the audio message is larger than 10 MB, return None
# If the media is larger than 10 MB, return None
if int(file_size) > 10 * 1024 * 1024:
logger.info(f"Audio message is larger than 10 MB, skipping")
raise ValueError(f"Audio message is larger than 10 MB")
return response["url"]
logger.info(f"Media is larger than 10 MB, skipping")
raise ValueError(f"Media is larger than 10 MB")
return response["url"], response["mime_type"]


async def response_to_user_whatsapp(message: str, from_number: str, body, intro_message=False):
async def response_to_user_whatsapp(message: str, from_number: str, body, intro_message=False, direct_message=False):
# Initialize user message to the body of the request
user_message = message

value = body["entry"][0]["changes"][0]["value"]
phone_number_id = value["metadata"]["phone_number_id"]
headers = {
"Authorization": f"Bearer {whatsapp_token}",
"Content-Type": "application/json",
}
url = "https://graph.facebook.com/v17.0/" + phone_number_id + "/messages"

# Send Intro Message
if intro_message:
data = make_whatsapp_payload(KHOJ_INTRO_MESSAGE, from_number)
response = requests.post(url, json=data, headers=headers)
response = whatsapp_cloud_api_session.post(url, json=data)
logger.info(f"Intro message sent to {from_number}")
response.raise_for_status()

if direct_message:
# We've constructed a templated response to the user. No need to route to the LLM.
data = make_whatsapp_payload(user_message, from_number)
response = whatsapp_cloud_api_session.post(url, json=data)
response.raise_for_status()
return

# Get Response from Agent
chat_response = send_message_to_khoj_chat(user_message, from_number)

if chat_response.get("response"):
chat_response_text = chat_response["response"]
data = make_whatsapp_payload(chat_response_text, from_number)
response = requests.post(url, json=data, headers=headers)
response = whatsapp_cloud_api_session.post(url, json=data)
response.raise_for_status()
elif chat_response.get("image"):
media_url = chat_response["image"]
Expand All @@ -201,13 +249,13 @@ async def response_to_user_whatsapp(message: str, from_number: str, body, intro_

media_id = upload_media_to_whatsapp(filepath, "image/png", phone_number_id)
data = make_whatsapp_image_payload(media_id, from_number)
response = requests.post(url, json=data, headers=headers)
response = whatsapp_cloud_api_session.post(url, json=data)
response.raise_for_status()
os.remove(filepath)
elif chat_response.get("detail"):
chat_response_text = chat_response["detail"]
data = make_whatsapp_payload(chat_response_text, from_number)
response = requests.post(url, json=data, headers=headers)
response = whatsapp_cloud_api_session.post(url, json=data)
response.raise_for_status()
else:
logger.error(f"Unsupported response type: {chat_response}", exc_info=True)

0 comments on commit abbccbd

Please sign in to comment.