From 37196f640348e8a1ade67016903b1d13ff015279 Mon Sep 17 00:00:00 2001 From: Prince Baghel Date: Wed, 25 Sep 2024 10:44:20 +0530 Subject: [PATCH] update: image-chat --- .github/workflows/main.yml | 1 + package.json | 1 + src/app/api/imageInput/route.ts | 1 + src/app/page.tsx | 59 ++++++++++++- src/components/VadAudio.tsx | 4 + src/components/chat.tsx | 152 +++++++++++++++++++++++++++++--- src/components/chatcard.tsx | 1 - src/components/inputBar.tsx | 2 +- src/components/inputBar2.tsx | 149 ++++++++++++++++++++----------- src/components/room.tsx | 4 +- 10 files changed, 303 insertions(+), 71 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 53944f22..ccff1590 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -19,6 +19,7 @@ jobs: - name: Lint and fix run: npm run lint:fix env: + ANTHROPIC_API_KEY: ${{secrets.ANTHROPIC_API_KEY}} TURSO_DB_URL: ${{secrets.TURSO_DB_URL}} TURSO_DB_AUTH_TOKEN: ${{secrets.TURSO_DB_AUTH_TOKEN}} OPEN_AI_API_KEY: ${{secrets.OPEN_AI_API_KEY}} diff --git a/package.json b/package.json index c105e750..3b33b846 100644 --- a/package.json +++ b/package.json @@ -120,6 +120,7 @@ "terser": "^5.33.0", "tldraw": "2.0.2", "typescript": "5.0.3", + "use-stay-awake": "^0.1.7", "vaul": "0.8.0", "zod": "3.22.4", "zustand": "4.4.6" diff --git a/src/app/api/imageInput/route.ts b/src/app/api/imageInput/route.ts index 14c833d3..e83fd544 100644 --- a/src/app/api/imageInput/route.ts +++ b/src/app/api/imageInput/route.ts @@ -78,6 +78,7 @@ export async function POST(request: Request, response: NextApiResponse) { { status: 400 }, ); } + console.log("imageFile", imageFile); const parts = imageFile.name.split("."); const extension = parts[parts.length - 1]; let awsImageUrl = ""; diff --git a/src/app/page.tsx b/src/app/page.tsx index d92756cb..acc9615b 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -59,6 +59,27 @@ export default function Home() { "model", parseAsString.withDefault("chat"), ); + const [imageUrl, setImageUrl] = useQueryState( + "imageUrl", + parseAsString.withDefault(""), + ); + const [imageName, setImageName] = useQueryState( + "imageName", + parseAsString.withDefault(""), + ); + const [imageType, setImageType] = useQueryState( + "imageType", + parseAsString.withDefault(""), + ); + const [imageSize, setImageSize] = useQueryState( + "imageSize", + parseAsString.withDefault(""), + ); + const [imageExtension, setImageExtension] = useQueryState( + "imageExtension", + parseAsString.withDefault(""), + ); + const [dropzoneActive, setDropzoneActive] = useState(false); const { isSignedIn, orgId, orgSlug, userId } = useAuth(); // if (isSignedIn) { @@ -80,9 +101,27 @@ export default function Home() { }); const data = await res.json(); - router.push( - `/dashboard/chat/${data.newChatId}?new=true&clipboard=true&model=${chatType}&input=${input}`, - ); + if (dropzoneActive) { + const queryParams = new URLSearchParams(window.location.search); + const params: { [key: string]: string } = {}; + queryParams.forEach((value, key) => { + params[key] = value; + }); + const params2 = { + ...params, + new: "true", + clipboard: "true", + model: chatType, + input: input, + }; + const queryParamsString = new URLSearchParams(params2).toString(); + + router.push(`/dashboard/chat/${data.newChatId}?${queryParamsString}`); + } else { + router.push( + `/dashboard/chat/${data.newChatId}?new=true&clipboard=true&model=${chatType}&input=${input}`, + ); + } } catch (error) { console.error("Error creating new chat:", error); } @@ -120,6 +159,18 @@ export default function Home() { {isSignedIn && orgId && orgSlug ? (
-
+
void; @@ -24,6 +25,7 @@ export default function VadAudio({ const audioChunks = useRef([]); const timerRef = useRef(null); const startTimeRef = useRef(null); + const device = useStayAwake(); const vad = useMicVAD({ onSpeechEnd: (audio: Float32Array) => { @@ -51,6 +53,7 @@ export default function VadAudio({ const handleStartListening = useCallback(() => { vad.start(); startTimer(); + device.preventSleeping(); onStartListening(); setIsListening(true); audioChunks.current = []; @@ -62,6 +65,7 @@ export default function VadAudio({ vad.pause(); resetDuration(); clearTimer(); + device.allowSleeping(); }, [vad]); const startTimer = () => { diff --git a/src/components/chat.tsx b/src/components/chat.tsx index 4a425a20..949e9e04 100644 --- a/src/components/chat.tsx +++ b/src/components/chat.tsx @@ -1,7 +1,7 @@ "use client"; import { useState, useEffect, useCallback } from "react"; import { ChatType } from "@/lib/types"; -import InputBar from "@/components/inputBar"; +import InputBar, { Schema } from "@/components/inputBar"; import { Message, useChat } from "ai/react"; import Startnewchatbutton from "@/components/startnewchatbutton"; import ChatMessageCombinator from "@/components/chatmessagecombinator"; @@ -164,25 +164,153 @@ export default function Chat(props: ChatProps) { }, sendExtraMessageFields: true, }); - console.log("messages", messages); + + const handleFirstImageMessage = useCallback(async () => { + const params = new URLSearchParams(window.location.search); + if ( + params.get("imageUrl") && + params.get("imageName") && + params.get("imageType") && + params.get("imageSize") + ) { + const queryParams: { [key: string]: string } = {}; + params.forEach((value, key) => { + queryParams[key] = value; + }); + const ID = nanoid(); + const imageMessasgeId = nanoid(); + const message: Message = { + id: ID, + role: "user", + content: incomingInput || "", + name: `${props.username},${props.uid}`, + audio: "", + }; + const createFileFromBlobUrl = async ( + blobUrl: string, + fileName: string, + ) => { + const response = await fetch(blobUrl); + const blob = await response.blob(); + return new File([blob], fileName, { type: blob.type }); + }; + + const imageUrl = params.get("imageUrl")!; + const imageName = params.get("imageName")!; + const imageExtension = params.get("imageExtension")!; + + const file = await createFileFromBlobUrl( + imageUrl, + `image.${imageExtension}`, + ); + console.log("Created file from blob URL:", file); + const zodMessage: any = Schema.safeParse({ + imageName: params.get("imageName"), + imageType: params.get("imageType"), + imageSize: Number(params.get("imageSize")), + file: file, + value: input, + userId: props.uid, + orgId: props.orgId, + chatId: props.chatId, + message: [message], + id: ID, + chattype: chattype, + }); + console.log("zodMessageImage Extension:", imageExtension); + // console.log("zodmessage", zodMessage); + // console.log("dropzone", props.dropZoneActive); + console.log("zodMessage", zodMessage, imageExtension); + if (zodMessage.success) { + const zodMSG = JSON.stringify(zodMessage); + const formData = new FormData(); + formData.append("zodMessage", zodMSG); + formData.append("file", file); + const response = await fetch("/api/imageInput", { + method: "POST", + body: formData, + }); + if (response) { + console.log("responce", response); + let assistantMsg = ""; + const reader = response.body?.getReader(); + console.log("reader", reader); + const decoder = new TextDecoder(); + let charsReceived = 0; + let content = ""; + reader + ?.read() + .then(async function processText({ done, value }) { + if (done) { + console.log("Stream complete"); + return; + } + charsReceived += value.length; + const chunk = decoder.decode(value, { stream: true }); + assistantMsg += chunk === "" ? `${chunk} \n` : chunk; + content += chunk === "" ? `${chunk} \n` : chunk; + // console.log("assistMsg", assistantMsg); + setMessages([ + ...messages, + awsImageMessage, + message, + { + ...assistantMessage, + content: assistantMsg, + }, + ]); + reader.read().then(processText); + }) + .then((e) => { + console.error("error", e); + }); + const awsImageMessage = { + role: "user", + subRole: "input-image", + content: `${process.env.NEXT_PUBLIC_IMAGE_PREFIX_URL}imagefolder/${props.chatId}/${ID}.${imageExtension}`, + id: ID, + } as Message; + const assistantMessage: Message = { + id: ID, + role: "assistant", + content: content, + }; + + console.log("image chat", queryParams); + // image chat + } + } + } + }, []); //TODO: handle user incoming from dashboard when invoked a chat useEffect(() => { if (isNewChat === "true" && incomingInput) { //TODO: use types for useQueryState if (incomingInput && chattype !== "tldraw") { - const newMessage = { - id: nanoid(), - role: "user", - content: incomingInput, - name: `${props.username},${props.uid}`, - audio: "", - } as Message; - append(newMessage); + const params = new URLSearchParams(window.location.search); + if ( + params.get("imageUrl") && + params.get("imageName") && + params.get("imageType") && + params.get("imageSize") + ) { + console.log("zodMessage", "we made to here", params); + handleFirstImageMessage(); + } else { + const newMessage = { + id: nanoid(), + role: "user", + content: incomingInput, + name: `${props.username},${props.uid}`, + audio: "", + } as Message; + append(newMessage); + } } - setIsFromClipboard("false"); - setIsNewChat("false"); } + setIsFromClipboard("false"); + setIsNewChat("false"); }, [isFromClipboard, isNewChat]); useEffect(() => { diff --git a/src/components/chatcard.tsx b/src/components/chatcard.tsx index 1d737f83..622bf7d0 100644 --- a/src/components/chatcard.tsx +++ b/src/components/chatcard.tsx @@ -53,7 +53,6 @@ const Chatcard = ({ const chatlog = JSON.parse(chat.messages as string) as ChatLog; console.log("chatlog", chatlog.log); const msgs = chatlog.log as ChatEntry[]; - console.log("messages", msgs); const chats = msgs.slice(0, 2); const res = await fetch(`/api/generateTitle/${chat.id}/${org_id}`, { method: "POST", diff --git a/src/components/inputBar.tsx b/src/components/inputBar.tsx index 5ac25f4f..d8a79eb8 100644 --- a/src/components/inputBar.tsx +++ b/src/components/inputBar.tsx @@ -27,7 +27,7 @@ import VadAudio from "./VadAudio"; const isValidImageType = (value: string) => /^image\/(jpeg|png|jpg|webp)$/.test(value); -const Schema = z.object({ +export const Schema = z.object({ imageName: z.any(), imageType: z.string().refine(isValidImageType, { message: "File type must be JPEG, PNG, or WEBP image", diff --git a/src/components/inputBar2.tsx b/src/components/inputBar2.tsx index 4cc56ac1..8b0ddf79 100644 --- a/src/components/inputBar2.tsx +++ b/src/components/inputBar2.tsx @@ -6,21 +6,23 @@ import { Dispatch, FormEvent, SetStateAction, + useCallback, useEffect, useState, } from "react"; import { ChatRequestOptions, CreateMessage, Message, nanoid } from "ai"; -import { PaperPlaneTilt } from "@phosphor-icons/react"; +import { PaperPlaneTilt, UploadSimple } from "@phosphor-icons/react"; import { Button } from "@/components/button"; import { ChatType, chattype } from "@/lib/types"; import { useQueryClient } from "@tanstack/react-query"; import { fetchEventSource } from "@microsoft/fetch-event-source"; import z from "zod"; import { toast } from "./ui/use-toast"; -import { useImageState } from "@/store/tlDrawImage"; import ModelSwitcher from "./modelswitcher"; // import VadAudio from "./vadAudio"; import VadAudio from "./VadAudio"; +import { useDropzone } from "react-dropzone"; +import { X } from "lucide-react"; const isValidImageType = (value: string) => /^image\/(jpeg|png|jpg|webp)$/.test(value); @@ -74,22 +76,26 @@ interface InputBarProps { isLoading?: boolean; chattype?: ChatType; setChattype?: Dispatch>; - setDropzoneActive?: Dispatch>; dropZoneActive?: boolean; onClickOpen?: any; onClickOpenChatSheet?: boolean | any; isHome?: boolean; submitInput?: () => void; + imageUrl: string; + setImageUrl: Dispatch>; + imageName: string; + setImageName: Dispatch>; + imageType: string; + setImageType: Dispatch>; + imageSize: string; + setImageSize: Dispatch>; + setDropzoneActive: Dispatch>; + dropzoneActive: boolean; + imageExtension: string; + setImageExtension: Dispatch>; } const InputBar = (props: InputBarProps) => { - const { - tldrawImageUrl, - tlDrawImage, - setTlDrawImage, - settldrawImageUrl, - onClickOpenChatSheet, - } = useImageState(); const [isAudioWaveVisible, setIsAudioWaveVisible] = useState(false); const [isRecording, setIsRecording] = useState(false); const [isTranscribing, setIsTranscribing] = useState(false); @@ -97,17 +103,6 @@ const InputBar = (props: InputBarProps) => { const [isRagLoading, setIsRagLoading] = useState(false); const queryClient = useQueryClient(); - // const preferences = usePreferences(); - // const { presenceData, updateStatus } = usePresence( - // `channel_${props.chatId}`, - // { - // id: props.userId, - // username: props.username, - // isTyping: false, - // } - // ); - // using local state for development purposes - const handleSubmit = async (e: FormEvent) => { e.preventDefault(); console.log("props.value", props.value); @@ -171,8 +166,6 @@ const InputBar = (props: InputBarProps) => { ?.read() .then(async function processText({ done, value }) { if (done) { - settldrawImageUrl(""); - setTlDrawImage(""); setDisableInputs(false); setIsRagLoading(false); console.log("Stream complete"); @@ -305,30 +298,43 @@ const InputBar = (props: InputBarProps) => { props?.append?.(message as Message); props?.setInput?.(""); }; + const [image, setImage] = useState([]); - const handleAudio = async (audioFile: File) => { - setIsAudioWaveVisible(false); - setIsTranscribing(true); - const f = new FormData(); - f.append("file", audioFile); - // Buffer.from(audioFile) - console.log(audioFile); - try { - const res = await fetch("/api/transcript", { - method: "POST", - body: f, - }); - - // console.log('data', await data.json()); - const data = await res.json(); - console.log("got the data", data); - props?.setInput?.(data.text); - setIsTranscribing(false); - } catch (err) { - console.error("got in error", err); - setIsTranscribing(false); + const onDrop = useCallback(async (acceptedFiles: File[]) => { + if (acceptedFiles && acceptedFiles[0]?.type.startsWith("image/")) { + setImage(acceptedFiles); + props.setImageType(acceptedFiles[0].type); + props.setImageSize(String(acceptedFiles[0].size)); + props.setImageUrl(URL.createObjectURL(acceptedFiles[0])); + props.setImageName(JSON.stringify(acceptedFiles[0].name)); + props.setImageExtension(acceptedFiles[0].name.split(".").pop() || ""); + props.setDropzoneActive(true); + } else { + { + image + ? null + : toast({ + description: ( +
+                  
+                    Please select a image file.
+                  
+                
+ ), + }); + } } - }; + }, []); + const { getRootProps, getInputProps, open } = useDropzone({ + onDrop, + accept: { + "image/jpeg": [], + "image/png": [], + }, + maxFiles: 1, + noClick: true, + noKeyboard: true, + }); const [audioId, setAudioId] = useState(0); const [transcriptHashTable, setTranscriptHashTable] = useState<{ @@ -400,12 +406,40 @@ const InputBar = (props: InputBarProps) => { return (
-
+
+ {/* */} + {props.dropzoneActive ? ( + <> + {" "} +
{ + props.setDropzoneActive(false); //TODO: clear params + props.setImageUrl(""); + props.setImageName(""); + props.setImageType(""); + props.setImageSize(""); + props.setImageExtension(""); + const url = new URL(window.location.href); + window.history.replaceState({}, document.title, url.toString()); + }} + > + Preview + +
+ + ) : null}
{
-
+
{ setChatType={props.setChattype} isHome={props.isHome} /> +
diff --git a/src/components/room.tsx b/src/components/room.tsx index a71bb05f..c810d944 100644 --- a/src/components/room.tsx +++ b/src/components/room.tsx @@ -47,9 +47,7 @@ const RoomWrapper = (props: Props) => { username: props.username, isTyping: false, }, - (presenseUpdate) => { - console.log("presenseUpdate", presenseUpdate); - }, + (presenseUpdate) => {}, ); const dbIds = getUserIdList( props.type === "tldraw" ? props.snapShot : props.chat,