Skip to content

Commit

Permalink
update: vad timer and audio hashtable
Browse files Browse the repository at this point in the history
  • Loading branch information
PrinceBaghel258025 committed Sep 23, 2024
1 parent 65dd5b6 commit b54d149
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 6 deletions.
51 changes: 49 additions & 2 deletions src/components/VadAudio.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"use client";

import { useState, useRef, useCallback } from "react";
import { useState, useRef, useCallback, useEffect } from "react";
import { useMicVAD, utils } from "@ricky0123/vad-react";
import { Microphone, StopCircle } from "@phosphor-icons/react";
import { Button } from "@/components/button";
Expand All @@ -17,7 +17,10 @@ export default function VadAudio({
onStopListening,
}: VadAudioProps) {
const [isListening, setIsListening] = useState(false);
const [duration, setDuration] = useState("00:00");
const audioChunks = useRef<Blob[]>([]);
const timerRef = useRef<NodeJS.Timeout | null>(null);
const startTimeRef = useRef<number | null>(null);

const vad = useMicVAD({
onSpeechEnd: (audio: Float32Array) => {
Expand All @@ -30,28 +33,71 @@ export default function VadAudio({

onAudioCapture(audioFile);
},
onSpeechStart: () => {
console.log("onSpeechStart");
},
workletURL: "/vad/vad.worklet.bundle.min.js",
modelURL: "/vad/silero_vad.onnx",
ortConfig: (ort) => {
ort.env.wasm.wasmPaths = "/vad/";
},
startOnLoad: false,
submitUserSpeechOnPause: true,
});

const handleStartListening = useCallback(() => {
vad.start();
startTimer();
onStartListening();
setIsListening(true);
audioChunks.current = [];
}, [vad]);
// console.log("vad.start()", vad.errored, vad.loading, vad.userSpeaking, vad.listening);

const handleStopListening = useCallback(() => {
setIsListening(false);
onStopListening();
vad.pause();
resetDuration();
clearTimer();
}, [vad]);

const startTimer = () => {
startTimeRef.current = Date.now();
timerRef.current = setInterval(() => {
if (startTimeRef.current) {
const elapsed = Date.now() - startTimeRef.current;
console.log("elapsed", elapsed);
const minutes = Math.floor(elapsed / 60000);
const seconds = Math.floor((elapsed % 60000) / 1000);
setDuration(
`${String(minutes).padStart(2, "0")}:${String(seconds).padStart(
2,
"0",
)}`,
);
}
}, 1000);
};

const resetDuration = () => {
setDuration("00:00");
clearTimer();
};

const clearTimer = () => {
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
startTimeRef.current = null;
};

useEffect(() => {
return () => {
clearTimer();
};
}, []);

return (
<div className="flex items-center gap-2">
<Button
Expand All @@ -75,6 +121,7 @@ export default function VadAudio({
/>
)}
</Button>
<span>{duration}</span>
</div>
);
}
18 changes: 16 additions & 2 deletions src/components/inputBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -339,26 +339,40 @@ const InputBar = (props: InputBarProps) => {
setIsTranscribing(false);
}
};

const [audioId, setAudioId] = useState(0);
const [transcriptHashTable, setTranscriptHashTable] = useState<{
[key: number]: string;
}>({});

const handleAudioChunk = async (audioChunk: File) => {
const newAudioId = audioId + 1;
setAudioId(newAudioId);
setIsTranscribing(true);
const f = new FormData();
f.append("file", audioChunk);
console.log(audioChunk);
try {
const res = await fetch("/api/transcript", {
method: "POST",
body: f,
});

const data = await res.json();
props.setInput((prev) => prev + data.text);
setTranscriptHashTable((prev) => ({
...prev,
[newAudioId]: data.text,
}));
// props?.setInput?.((prev) => prev + data.text);
setIsTranscribing(false);
} catch (err) {
console.error("got in error", err);
setIsTranscribing(false);
}
};

useEffect(() => {
props?.setInput?.(Object.values(transcriptHashTable).join(" "));
}, [transcriptHashTable]);
useEffect(() => {
if (
presenceData
Expand Down
17 changes: 15 additions & 2 deletions src/components/inputBar2.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -330,26 +330,39 @@ const InputBar = (props: InputBarProps) => {
}
};

const [audioId, setAudioId] = useState(0);
const [transcriptHashTable, setTranscriptHashTable] = useState<{
[key: number]: string;
}>({});

const handleAudioChunk = async (audioChunk: File) => {
const newAudioId = audioId + 1;
setAudioId(newAudioId);
setIsTranscribing(true);
const f = new FormData();
f.append("file", audioChunk);
console.log(audioChunk);
try {
const res = await fetch("/api/transcript", {
method: "POST",
body: f,
});

const data = await res.json();
props?.setInput?.((prev) => prev + data.text);
setTranscriptHashTable((prev) => ({
...prev,
[newAudioId]: data.text,
}));
setIsTranscribing(false);
} catch (err) {
console.error("got in error", err);
setIsTranscribing(false);
}
};

useEffect(() => {
props?.setInput?.(Object.values(transcriptHashTable).join(" "));
}, [transcriptHashTable]);

//TODO:
const handleInputChange = (e: ChangeEvent<HTMLTextAreaElement>) => {
if (props.dropZoneActive) {
Expand Down

0 comments on commit b54d149

Please sign in to comment.