From c389274e2dc647a06a1e6985ce30c0b44fb9a833 Mon Sep 17 00:00:00 2001 From: Prince Baghel Date: Fri, 11 Oct 2024 16:35:52 +0530 Subject: [PATCH 1/3] init: long audio paragraph chunking and stitching --- package.json | 2 + src/app/api/tts/route.ts | 107 +++++++++++++++++++++++++++++++-------- 2 files changed, 88 insertions(+), 21 deletions(-) diff --git a/package.json b/package.json index c9dd1707..e52c2687 100644 --- a/package.json +++ b/package.json @@ -86,6 +86,8 @@ "eslint": "8.37.0", "eslint-config-next": "13.2.4", "eventsource-parser": "1.1.1", + "ffmpeg-static": "^5.2.0", + "fluent-ffmpeg": "^2.1.3", "framer-motion": "10.16.2", "get-blob-duration": "1.2.0", "html-to-image": "1.11.11", diff --git a/src/app/api/tts/route.ts b/src/app/api/tts/route.ts index fbfeed64..3aa2f035 100644 --- a/src/app/api/tts/route.ts +++ b/src/app/api/tts/route.ts @@ -8,6 +8,7 @@ import { eq } from "drizzle-orm"; import { NextRequest, NextResponse } from "next/server"; import OpenAI from "openai"; import * as z from "zod"; + export const maxDuration = 180; const bodyobj = z.object({ @@ -19,6 +20,67 @@ const bodyobj = z.object({ messages: z.any().optional(), }); +const MAX_CHUNK_LENGTH = 4000; // Slightly less than 4095 to be safe + +function chunkText(text: string): string[] { + const paragraphs = text.split("\n\n"); + const chunks: string[] = []; + let currentChunk = ""; + + for (const paragraph of paragraphs) { + if (currentChunk.length + paragraph.length > MAX_CHUNK_LENGTH) { + if (currentChunk) { + chunks.push(currentChunk.trim()); + currentChunk = ""; + } + if (paragraph.length > MAX_CHUNK_LENGTH) { + // If a single paragraph is too long, split it into sentences + const sentences = paragraph.match(/[^.!?]+[.!?]+/g) || [paragraph]; + for (const sentence of sentences) { + if (currentChunk.length + sentence.length > MAX_CHUNK_LENGTH) { + chunks.push(currentChunk.trim()); + currentChunk = sentence; + } else { + currentChunk += " " + sentence; + } + } + } else { + currentChunk = paragraph; + } + } else { + currentChunk += (currentChunk ? "\n\n" : "") + paragraph; + } + } + + if (currentChunk) { + chunks.push(currentChunk.trim()); + } + + return chunks; +} + +async function generateAudioForChunk( + openai: OpenAI, + chunk: string, +): Promise { + const mp3 = await openai.audio.speech.create({ + model: "tts-1", + voice: "alloy", + input: chunk, + response_format: "mp3", + }); + + return Buffer.from(await mp3.arrayBuffer()); +} + +async function concatenateAudioBuffers( + audioBuffers: Buffer[], +): Promise { + // Simple concatenation of MP3 buffers + // Note: This may not work perfectly for all MP3 files and may require a more sophisticated approach + return Buffer.concat(audioBuffers as unknown as Uint8Array[]); +} + export async function POST(request: NextRequest) { const b = await request.json(); const searchParams = await request.nextUrl.searchParams; @@ -32,21 +94,20 @@ export async function POST(request: NextRequest) { const chatId = body.chatId; const messages: ChatEntry[] = body.messages; - const Openai = new OpenAI({ + const openai = new OpenAI({ apiKey: env.OPEN_AI_API_KEY, }); if (text && messageId && body.index) { console.log("got into if"); // handling audio for a single message - const mp3 = await Openai.audio.speech.create({ - model: "tts-1", - voice: "alloy", - input: text, - response_format: "aac", - }); + const chunks = chunkText(text); + const audioBuffers = await Promise.all( + chunks.map((chunk) => generateAudioForChunk(openai, chunk)), + ); + + const finalBuffer = await concatenateAudioBuffers(audioBuffers); - const buffer = Buffer.from(await mp3.arrayBuffer()); // fetching the chat let chatlog: ChatLog = { log: [] }; let fetchedChat: ChatSchema[] = []; @@ -81,8 +142,11 @@ export async function POST(request: NextRequest) { messageId = messageId ? messageId : chatlog.log[body.index].id; - // adding the audio to the message - const audioUrl = await saveAudioMessage({ buffer, chatId, messageId }); + const audioUrl = await saveAudioMessage({ + buffer: finalBuffer, + chatId, + messageId, + }); message.audio = audioUrl; await db @@ -98,20 +162,22 @@ export async function POST(request: NextRequest) { ); } else { // summarize and generate audio for all messages - const summary: string = await summarizeChat(messages); - const mp3 = await Openai.audio.speech.create({ - model: "tts-1", - voice: "alloy", - input: summary, - response_format: "aac", - }); + const chunks = chunkText(summary); + const audioBuffers = await Promise.all( + chunks.map((chunk) => generateAudioForChunk(openai, chunk)), + ); + + const finalBuffer = await concatenateAudioBuffers(audioBuffers); - const buffer = Buffer.from(await mp3.arrayBuffer()); const messageId = "summary"; // as it is the summary of the whole chat - const audioUrl = await saveAudioMessage({ buffer, chatId, messageId }); + const audioUrl = await saveAudioMessage({ + buffer: finalBuffer, + chatId, + messageId, + }); - // update the db to save audio url for correspointing chat + // update the db to save audio url for corresponding chat await db .update(chats) .set({ @@ -120,7 +186,6 @@ export async function POST(request: NextRequest) { }) .where(eq(chats.id, Number(chatId))) .run(); - // fetching the chat return new NextResponse(JSON.stringify({ audioUrl: audioUrl })); } From c883ed4b7b952bdf30e661eada4569615ba28180 Mon Sep 17 00:00:00 2001 From: Prince Baghel Date: Sat, 12 Oct 2024 13:54:10 +0530 Subject: [PATCH 2/3] chore: switch from zephyr to llama3.1 for summary --- src/app/dashboard/layout.tsx | 2 +- src/app/env.mjs | 6 ++++++ src/app/page.tsx | 14 ++++++++++++++ src/utils/apiHelper.ts | 8 ++++---- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/app/dashboard/layout.tsx b/src/app/dashboard/layout.tsx index 3bbc7175..1203d232 100644 --- a/src/app/dashboard/layout.tsx +++ b/src/app/dashboard/layout.tsx @@ -100,7 +100,7 @@ export default function LoggedInLayout({ ); } -const SearchButton = (props: React.ComponentProps) => { +export const SearchButton = (props: React.ComponentProps) => { return ( + {isSignedIn ? ( + <> + + + Search + + + + ) : null}
diff --git a/src/utils/apiHelper.ts b/src/utils/apiHelper.ts index 7fcda614..302c4287 100644 --- a/src/utils/apiHelper.ts +++ b/src/utils/apiHelper.ts @@ -413,7 +413,7 @@ export const summarizeChat = async (chat: ChatEntry[]): Promise => { name: "SummarizeChat", run_type: "llm", inputs: { - model: "HuggingFaceH4/zephyr-7b-beta", + model: "llama3.1-8b", messages: msgs as ChatCompletionMessageParam[], top_p: 0.7, max_tokens: 512, @@ -423,12 +423,12 @@ export const summarizeChat = async (chat: ChatEntry[]): Promise => { const parentRun = new RunTree(parentRunConfig); const openai = new OpenAI({ - baseURL: env.ANYSCALE_API_BASE, - apiKey: env.ANYSCALE_API_KEY, + baseURL: env.LITELLM_BASE_URL, + apiKey: env.LITELLM_API_KEY, }); const stream: OpenAI.Chat.ChatCompletion = await openai.chat.completions.create({ - model: "HuggingFaceH4/zephyr-7b-beta", + model: "llama3.1-8b", messages: [ { role: "user", content: msg }, ] as ChatCompletionMessageParam[], From d18cc6d6a9f928923754dfd44257943ba11c4932 Mon Sep 17 00:00:00 2001 From: Prince Baghel Date: Sat, 12 Oct 2024 13:57:22 +0530 Subject: [PATCH 3/3] patch: action workflow --- .github/workflows/main.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ccff1590..5a7615cb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,6 +19,8 @@ jobs: - name: Lint and fix run: npm run lint:fix env: + LITELLM_BASE_URL: ${{secrets.LITELLM_BASE_URL}} + LITELLM_API_KEY: ${{secrets.LITELLM_API_KEY}} ANTHROPIC_API_KEY: ${{secrets.ANTHROPIC_API_KEY}} TURSO_DB_URL: ${{secrets.TURSO_DB_URL}} TURSO_DB_AUTH_TOKEN: ${{secrets.TURSO_DB_AUTH_TOKEN}}