Skip to content

Commit

Permalink
Merge pull request #456 from gilcu3/tts-support
Browse files Browse the repository at this point in the history
tts support (WIP)
  • Loading branch information
n3d1117 authored Nov 18, 2023
2 parents 2a0d1aa + c3ec1af commit 8fd24a9
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ ALLOWED_TELEGRAM_USER_IDS=USER_ID_1,USER_ID_2
# TRANSCRIPTION_PRICE=0.006
# ENABLE_QUOTING=true
# ENABLE_IMAGE_GENERATION=true
# ENABLE_TTS_GENERATION=true
# ENABLE_TRANSCRIPTION=true
# PROXY=http://localhost:8080
# OPENAI_MODEL=gpt-3.5-turbo
Expand All @@ -42,4 +43,6 @@ ALLOWED_TELEGRAM_USER_IDS=USER_ID_1,USER_ID_2
# IMAGE_FORMAT=document
# GROUP_TRIGGER_KEYWORD=""
# IGNORE_GROUP_TRANSCRIPTIONS=true
# TTS_MODEL="tts-1"
# TTS_VOICE="alloy"
# BOT_LANGUAGE=en
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Check out the [Budget Manual](https://github.com/n3d1117/chatgpt-telegram-bot/di
| `ENABLE_QUOTING` | Whether to enable message quoting in private chats | `true` |
| `ENABLE_IMAGE_GENERATION` | Whether to enable image generation via the `/image` command | `true` |
| `ENABLE_TRANSCRIPTION` | Whether to enable transcriptions of audio and video messages | `true` |
| `ENABLE_TTS_GENERATION` | Whether to enable text to speech generation via the `/tts` | `true` |
| `PROXY` | Proxy to be used for OpenAI and Telegram bot (e.g. `http://localhost:8080`) | - |
| `OPENAI_MODEL` | The OpenAI model to use for generating responses. You can find all available models [here](https://platform.openai.com/docs/models/) | `gpt-3.5-turbo` |
| `OPENAI_API_BASE` | Endpoint URL for unofficial OpenAI-compatible APIs (e.g., LocalAI or text-generation-webui) | Default OpenAI API URL |
Expand All @@ -107,6 +108,8 @@ Check out the [Budget Manual](https://github.com/n3d1117/chatgpt-telegram-bot/di
| `IGNORE_GROUP_TRANSCRIPTIONS` | If set to true, the bot will not process transcriptions in group chats | `true` |
| `BOT_LANGUAGE` | Language of general bot messages. Currently available: `en`, `de`, `ru`, `tr`, `it`, `fi`, `es`, `id`, `nl`, `zh-cn`, `zh-tw`, `vi`, `fa`, `pt-br`, `uk`, `ms`, `uz`. [Contribute with additional translations](https://github.com/n3d1117/chatgpt-telegram-bot/discussions/219) | `en` |
| `WHISPER_PROMPT` | To improve the accuracy of Whisper's transcription service, especially for specific names or terms, you can set up a custom message. [Speech to text - Prompting](https://platform.openai.com/docs/guides/speech-to-text/prompting) | `-` |
| `TTS_VOICE` | The Text to Speech voice to use. Allowed values: `alloy`, `echo`, `fable`, `onyx`, `nova`, or `shimmer` | `alloy` |
| `TTS_MODEL` | The Text to Speech model to use. Allowed values: `tts-1` or `tts-1-hd` | `tts-1` |

Check out the [official API reference](https://platform.openai.com/docs/api-reference/chat) for more details.

Expand Down
5 changes: 5 additions & 0 deletions bot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ def main():
'bot_language': os.environ.get('BOT_LANGUAGE', 'en'),
'show_plugins_used': os.environ.get('SHOW_PLUGINS_USED', 'false').lower() == 'true',
'whisper_prompt': os.environ.get('WHISPER_PROMPT', ''),
'tts_model': os.environ.get('TTS_MODEL', 'tts-1'),
'tts_voice': os.environ.get('TTS_VOICE', 'alloy'),
}

if openai_config['enable_functions'] and not functions_available:
Expand All @@ -73,6 +75,7 @@ def main():
'enable_quoting': os.environ.get('ENABLE_QUOTING', 'true').lower() == 'true',
'enable_image_generation': os.environ.get('ENABLE_IMAGE_GENERATION', 'true').lower() == 'true',
'enable_transcription': os.environ.get('ENABLE_TRANSCRIPTION', 'true').lower() == 'true',
'enable_tts_generation': os.environ.get('ENABLE_TTS_GENERATION', 'true').lower() == 'true',
'budget_period': os.environ.get('BUDGET_PERIOD', 'monthly').lower(),
'user_budgets': os.environ.get('USER_BUDGETS', os.environ.get('MONTHLY_USER_BUDGETS', '*')),
'guest_budget': float(os.environ.get('GUEST_BUDGET', os.environ.get('MONTHLY_GUEST_BUDGET', '100.0'))),
Expand All @@ -85,6 +88,8 @@ def main():
'token_price': float(os.environ.get('TOKEN_PRICE', 0.002)),
'image_prices': [float(i) for i in os.environ.get('IMAGE_PRICES', "0.016,0.018,0.02").split(",")],
'image_receive_mode': os.environ.get('IMAGE_FORMAT', "photo"),
'tts_model': os.environ.get('TTS_MODEL', 'tts-1'),
'tts_prices': [float(i) for i in os.environ.get('TTS_PRICES', "0.015,0.030").split(",")],
'transcription_price': float(os.environ.get('TRANSCRIPTION_PRICE', 0.006)),
'bot_language': os.environ.get('BOT_LANGUAGE', 'en'),
}
Expand Down
23 changes: 23 additions & 0 deletions bot/openai_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import requests
import json
import httpx
import io
from datetime import date
from calendar import monthrange

Expand Down Expand Up @@ -342,6 +343,28 @@ async def generate_image(self, prompt: str) -> tuple[str, str]:
except Exception as e:
raise Exception(f"⚠️ _{localized_text('error', bot_language)}._ ⚠️\n{str(e)}") from e

async def generate_speech(self, text: str) -> tuple[any, int]:
"""
Generates an audio from the given text using TTS model.
:param prompt: The text to send to the model
:return: The audio in bytes and the text size
"""
bot_language = self.config['bot_language']
try:
response = await self.client.audio.speech.create(
model=self.config['tts_model'],
voice=self.config['tts_voice'],
input=text,
response_format='opus'
)

temp_file = io.BytesIO()
temp_file.write(response.read())
temp_file.seek(0)
return temp_file, len(text)
except Exception as e:
raise Exception(f"⚠️ _{localized_text('error', bot_language)}._ ⚠️\n{str(e)}") from e

async def transcribe(self, filename):
"""
Transcribes the audio file using the Whisper model.
Expand Down
61 changes: 61 additions & 0 deletions bot/telegram_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def __init__(self, config: dict, openai: OpenAIHelper):
if self.config.get('enable_image_generation', False):
self.commands.append(BotCommand(command='image', description=localized_text('image_description', bot_language)))

if self.config.get('enable_tts_generation', False):
self.commands.append(BotCommand(command='tts', description=localized_text('tts_description', bot_language)))

self.group_commands = [BotCommand(
command='chat', description=localized_text('chat_description', bot_language)
)] + self.commands
Expand Down Expand Up @@ -94,6 +97,7 @@ async def stats(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
images_today, images_month = self.usage[user_id].get_current_image_count()
(transcribe_minutes_today, transcribe_seconds_today, transcribe_minutes_month,
transcribe_seconds_month) = self.usage[user_id].get_current_transcription_duration()
characters_today, characters_month = self.usage[user_id].get_current_tts_usage()
current_cost = self.usage[user_id].get_current_cost()

chat_id = update.effective_chat.id
Expand All @@ -112,11 +116,16 @@ async def stats(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
text_today_images = ""
if self.config.get('enable_image_generation', False):
text_today_images = f"{images_today} {localized_text('stats_images', bot_language)}\n"

text_today_tts = ""
if self.config.get('enable_tts_generation', False):
text_today_tts = f"{characters_today} {localized_text('stats_tts', bot_language)}\n"

text_today = (
f"*{localized_text('usage_today', bot_language)}:*\n"
f"{tokens_today} {localized_text('stats_tokens', bot_language)}\n"
f"{text_today_images}" # Include the image statistics for today if applicable
f"{text_today_tts}"
f"{transcribe_minutes_today} {localized_text('stats_transcribe', bot_language)[0]} "
f"{transcribe_seconds_today} {localized_text('stats_transcribe', bot_language)[1]}\n"
f"{localized_text('stats_total', bot_language)}{current_cost['cost_today']:.2f}\n"
Expand All @@ -126,12 +135,17 @@ async def stats(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
text_month_images = ""
if self.config.get('enable_image_generation', False):
text_month_images = f"{images_month} {localized_text('stats_images', bot_language)}\n"

text_month_tts = ""
if self.config.get('enable_tts_generation', False):
text_month_tts = f"{characters_month} {localized_text('stats_tts', bot_language)}\n"

# Check if image generation is enabled and, if so, generate the image statistics for the month
text_month = (
f"*{localized_text('usage_month', bot_language)}:*\n"
f"{tokens_month} {localized_text('stats_tokens', bot_language)}\n"
f"{text_month_images}" # Include the image statistics for the month if applicable
f"{text_month_tts}"
f"{transcribe_minutes_month} {localized_text('stats_transcribe', bot_language)[0]} "
f"{transcribe_seconds_month} {localized_text('stats_transcribe', bot_language)[1]}\n"
f"{localized_text('stats_total', bot_language)}{current_cost['cost_month']:.2f}"
Expand Down Expand Up @@ -258,6 +272,52 @@ async def _generate():

await wrap_with_indicator(update, context, _generate, constants.ChatAction.UPLOAD_PHOTO)

async def tts(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
"""
Generates an speech for the given input using TTS APIs
"""
if not self.config['enable_tts_generation'] \
or not await self.check_allowed_and_within_budget(update, context):
return

tts_query = message_text(update.message)
if tts_query == '':
await update.effective_message.reply_text(
message_thread_id=get_thread_id(update),
text=localized_text('tts_no_prompt', self.config['bot_language'])
)
return

logging.info(f'New speech generation request received from user {update.message.from_user.name} '
f'(id: {update.message.from_user.id})')

async def _generate():
try:
speech_file, text_length = await self.openai.generate_speech(text=tts_query)

await update.effective_message.reply_voice(
reply_to_message_id=get_reply_to_message_id(self.config, update),
voice=speech_file
)
speech_file.close()
# add image request to users usage tracker
user_id = update.message.from_user.id
self.usage[user_id].add_tts_request(text_length, self.config['tts_model'], self.config['tts_prices'])
# add guest chat request to guest usage tracker
if str(user_id) not in self.config['allowed_user_ids'].split(',') and 'guests' in self.usage:
self.usage["guests"].add_tts_request(text_length, self.config['tts_model'], self.config['tts_prices'])

except Exception as e:
logging.exception(e)
await update.effective_message.reply_text(
message_thread_id=get_thread_id(update),
reply_to_message_id=get_reply_to_message_id(self.config, update),
text=f"{localized_text('tts_fail', self.config['bot_language'])}: {str(e)}",
parse_mode=constants.ParseMode.MARKDOWN
)

await wrap_with_indicator(update, context, _generate, constants.ChatAction.UPLOAD_VOICE)

async def transcribe(self, update: Update, context: ContextTypes.DEFAULT_TYPE):
"""
Transcribe audio messages.
Expand Down Expand Up @@ -794,6 +854,7 @@ def run(self):
application.add_handler(CommandHandler('reset', self.reset))
application.add_handler(CommandHandler('help', self.help))
application.add_handler(CommandHandler('image', self.image))
application.add_handler(CommandHandler('tts', self.tts))
application.add_handler(CommandHandler('start', self.help))
application.add_handler(CommandHandler('stats', self.stats))
application.add_handler(CommandHandler('resend', self.resend))
Expand Down
64 changes: 61 additions & 3 deletions bot/usage_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,16 @@ def __init__(self, user_id, user_name, logs_dir="usage_logs"):
if os.path.isfile(self.user_file):
with open(self.user_file, "r") as file:
self.usage = json.load(file)
if 'tts_characters' not in self.usage['usage_history']:
self.usage['usage_history']['tts_characters'] = {}
else:
# ensure directory exists
pathlib.Path(logs_dir).mkdir(exist_ok=True)
# create new dictionary for this user
self.usage = {
"user_name": user_name,
"current_cost": {"day": 0.0, "month": 0.0, "all_time": 0.0, "last_update": str(date.today())},
"usage_history": {"chat_tokens": {}, "transcription_seconds": {}, "number_images": {}}
"usage_history": {"chat_tokens": {}, "transcription_seconds": {}, "number_images": {}, "tts_characters": {}}
}

# token usage functions:
Expand Down Expand Up @@ -151,6 +153,57 @@ def get_current_image_count(self):
usage_month += sum(images)
return usage_day, usage_month

# tts usage functions:

def add_tts_request(self, text_length, tts_model, tts_prices):
tts_models = ['tts-1', 'tts-1-hd']
price = tts_prices[tts_models.index(tts_model)]
today = date.today()
tts_price = round(text_length * price / 1000, 2)
self.add_current_costs(tts_price)

if 'tts_characters' not in self.usage['usage_history']:
self.usage['usage_history']['tts_characters'] = {}

if tts_model not in self.usage['usage_history']['tts_characters']:
self.usage['usage_history']['tts_characters'][tts_model] = {}

# update usage_history
if str(today) in self.usage["usage_history"]["tts_characters"][tts_model]:
# add requested text length to existing date
self.usage["usage_history"]["tts_characters"][tts_model][str(today)] += text_length
else:
# create new entry for current date
self.usage["usage_history"]["tts_characters"][tts_model][str(today)] = text_length

# write updated token usage to user file
with open(self.user_file, "w") as outfile:
json.dump(self.usage, outfile)

def get_current_tts_usage(self):
"""Get length of speech generated for today and this month.
:return: total amount of characters converted to speech per day and per month
"""

tts_models = ['tts-1', 'tts-1-hd']
today = date.today()
characters_day = 0
for tts_model in tts_models:
if tts_model in self.usage["usage_history"]["tts_characters"] and \
str(today) in self.usage["usage_history"]["tts_characters"][tts_model]:
characters_day += self.usage["usage_history"]["tts_characters"][tts_model][str(today)]

month = str(today)[:7] # year-month as string
characters_month = 0
for tts_model in tts_models:
if tts_model in self.usage["usage_history"]["tts_characters"]:
for today, characters in self.usage["usage_history"]["tts_characters"][tts_model].items():
if today.startswith(month):
characters_month += characters
return int(characters_day), int(characters_month)


# transcription usage functions:

def add_transcription_seconds(self, seconds, minute_price=0.006):
Expand Down Expand Up @@ -236,13 +289,14 @@ def get_current_cost(self):
cost_all_time = self.usage["current_cost"].get("all_time", self.initialize_all_time_cost())
return {"cost_today": cost_day, "cost_month": cost_month, "cost_all_time": cost_all_time}

def initialize_all_time_cost(self, tokens_price=0.002, image_prices="0.016,0.018,0.02", minute_price=0.006):
def initialize_all_time_cost(self, tokens_price=0.002, image_prices="0.016,0.018,0.02", minute_price=0.006, tts_prices='0.015,0.030'):
"""Get total USD amount of all requests in history
:param tokens_price: price per 1000 tokens, defaults to 0.002
:param image_prices: prices for images of sizes ["256x256", "512x512", "1024x1024"],
defaults to [0.016, 0.018, 0.02]
:param minute_price: price per minute transcription, defaults to 0.006
:param character_price: price per character tts per model ['tts-1', 'tts-1-hd'], defaults to [0.015, 0.030]
:return: total cost of all requests
"""
total_tokens = sum(self.usage['usage_history']['chat_tokens'].values())
Expand All @@ -255,5 +309,9 @@ def initialize_all_time_cost(self, tokens_price=0.002, image_prices="0.016,0.018
total_transcription_seconds = sum(self.usage['usage_history']['transcription_seconds'].values())
transcription_cost = round(total_transcription_seconds * minute_price / 60, 2)

all_time_cost = token_cost + transcription_cost + image_cost
total_characters = [sum(tts_model.values()) for tts_model in self.usage['usage_history']['tts_characters'].values()]
tts_prices_list = [float(x) for x in tts_prices.split(',')]
tts_cost = round(sum([count * price / 1000 for count, price in zip(total_characters, tts_prices_list)]), 2)

all_time_cost = token_cost + transcription_cost + image_cost + tts_cost
return all_time_cost
Loading

0 comments on commit 8fd24a9

Please sign in to comment.