Skip to content

Commit

Permalink
init: long audio paragraph chunking and stitching
Browse files Browse the repository at this point in the history
  • Loading branch information
PrinceBaghel258025 committed Oct 11, 2024
1 parent 30d17d5 commit c389274
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 21 deletions.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@
"eslint": "8.37.0",
"eslint-config-next": "13.2.4",
"eventsource-parser": "1.1.1",
"ffmpeg-static": "^5.2.0",
"fluent-ffmpeg": "^2.1.3",
"framer-motion": "10.16.2",
"get-blob-duration": "1.2.0",
"html-to-image": "1.11.11",
Expand Down
107 changes: 86 additions & 21 deletions src/app/api/tts/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { eq } from "drizzle-orm";
import { NextRequest, NextResponse } from "next/server";
import OpenAI from "openai";
import * as z from "zod";

export const maxDuration = 180;

const bodyobj = z.object({
Expand All @@ -19,6 +20,67 @@ const bodyobj = z.object({
messages: z.any().optional(),
});

const MAX_CHUNK_LENGTH = 4000; // Slightly less than 4095 to be safe

function chunkText(text: string): string[] {
const paragraphs = text.split("\n\n");
const chunks: string[] = [];
let currentChunk = "";

for (const paragraph of paragraphs) {
if (currentChunk.length + paragraph.length > MAX_CHUNK_LENGTH) {
if (currentChunk) {
chunks.push(currentChunk.trim());
currentChunk = "";
}
if (paragraph.length > MAX_CHUNK_LENGTH) {
// If a single paragraph is too long, split it into sentences
const sentences = paragraph.match(/[^.!?]+[.!?]+/g) || [paragraph];
for (const sentence of sentences) {
if (currentChunk.length + sentence.length > MAX_CHUNK_LENGTH) {
chunks.push(currentChunk.trim());
currentChunk = sentence;
} else {
currentChunk += " " + sentence;
}
}
} else {
currentChunk = paragraph;
}
} else {
currentChunk += (currentChunk ? "\n\n" : "") + paragraph;
}
}

if (currentChunk) {
chunks.push(currentChunk.trim());
}

return chunks;
}

async function generateAudioForChunk(
openai: OpenAI,
chunk: string,
): Promise<Buffer> {
const mp3 = await openai.audio.speech.create({
model: "tts-1",
voice: "alloy",
input: chunk,
response_format: "mp3",
});

return Buffer.from(await mp3.arrayBuffer());
}

async function concatenateAudioBuffers(
audioBuffers: Buffer[],
): Promise<Buffer> {
// Simple concatenation of MP3 buffers
// Note: This may not work perfectly for all MP3 files and may require a more sophisticated approach
return Buffer.concat(audioBuffers as unknown as Uint8Array[]);
}

export async function POST(request: NextRequest) {
const b = await request.json();
const searchParams = await request.nextUrl.searchParams;
Expand All @@ -32,21 +94,20 @@ export async function POST(request: NextRequest) {
const chatId = body.chatId;
const messages: ChatEntry[] = body.messages;

const Openai = new OpenAI({
const openai = new OpenAI({
apiKey: env.OPEN_AI_API_KEY,
});

if (text && messageId && body.index) {
console.log("got into if");
// handling audio for a single message
const mp3 = await Openai.audio.speech.create({
model: "tts-1",
voice: "alloy",
input: text,
response_format: "aac",
});
const chunks = chunkText(text);
const audioBuffers = await Promise.all(
chunks.map((chunk) => generateAudioForChunk(openai, chunk)),
);

const finalBuffer = await concatenateAudioBuffers(audioBuffers);

const buffer = Buffer.from(await mp3.arrayBuffer());
// fetching the chat
let chatlog: ChatLog = { log: [] };
let fetchedChat: ChatSchema[] = [];
Expand Down Expand Up @@ -81,8 +142,11 @@ export async function POST(request: NextRequest) {

messageId = messageId ? messageId : chatlog.log[body.index].id;

// adding the audio to the message
const audioUrl = await saveAudioMessage({ buffer, chatId, messageId });
const audioUrl = await saveAudioMessage({
buffer: finalBuffer,
chatId,
messageId,
});
message.audio = audioUrl;

await db
Expand All @@ -98,20 +162,22 @@ export async function POST(request: NextRequest) {
);
} else {
// summarize and generate audio for all messages

const summary: string = await summarizeChat(messages);
const mp3 = await Openai.audio.speech.create({
model: "tts-1",
voice: "alloy",
input: summary,
response_format: "aac",
});
const chunks = chunkText(summary);
const audioBuffers = await Promise.all(
chunks.map((chunk) => generateAudioForChunk(openai, chunk)),
);

const finalBuffer = await concatenateAudioBuffers(audioBuffers);

const buffer = Buffer.from(await mp3.arrayBuffer());
const messageId = "summary"; // as it is the summary of the whole chat
const audioUrl = await saveAudioMessage({ buffer, chatId, messageId });
const audioUrl = await saveAudioMessage({
buffer: finalBuffer,
chatId,
messageId,
});

// update the db to save audio url for correspointing chat
// update the db to save audio url for corresponding chat
await db
.update(chats)
.set({
Expand All @@ -120,7 +186,6 @@ export async function POST(request: NextRequest) {
})
.where(eq(chats.id, Number(chatId)))
.run();
// fetching the chat

return new NextResponse(JSON.stringify({ audioUrl: audioUrl }));
}
Expand Down

0 comments on commit c389274

Please sign in to comment.