diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 89f7b832a95..030a0d093ee 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -80,3 +80,4 @@ /generative-ai/gemini/sample-apps/e2e-gen-ai-app-starter-pack @eliasecchig @lspatarog @mariagpuyol @GoogleCloudPlatform/generative-ai-devrel /generative-ai/vision/use-cases/hey_llm @tushuhei @GoogleCloudPlatform/generative-ai-devrel /generative-ai/gemini/sample-apps/llamaindex-rag/backend/indexing/ @Lionel-Lim @GoogleCloudPlatform/generative-ai-devrel +/generative-ai/gemini/multimodal-live-api/websocket-demo-app/ @ZackAkil @GoogleCloudPlatform/generative-ai-devrel diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index cce5259daf9..e1b8afde544 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -907,6 +907,7 @@ metaverse metricx mgrs mic +mics millis miranda mmarco diff --git a/gemini/multimodal-live-api/websocket-demo-app/.prettierrc b/gemini/multimodal-live-api/websocket-demo-app/.prettierrc new file mode 100644 index 00000000000..76e9e194ca7 --- /dev/null +++ b/gemini/multimodal-live-api/websocket-demo-app/.prettierrc @@ -0,0 +1,4 @@ +{ + "bracketSameLine": true, + "tabWidth": 4 +} diff --git a/gemini/multimodal-live-api/websocket-demo-app/README.md b/gemini/multimodal-live-api/websocket-demo-app/README.md index 8401e2b213f..23b8944a2ba 100644 --- a/gemini/multimodal-live-api/websocket-demo-app/README.md +++ b/gemini/multimodal-live-api/websocket-demo-app/README.md @@ -15,16 +15,15 @@ While some web development experience, particularly with localhost, port numbers ### File Structure +- `backend/main.py`: The Python backend code +- `backend/requirements.txt`: Lists the required Python dependencies -- backend/main.py: The Python backend code -- backend/requirements.txt: Lists the required Python dependencies - -- frontend/index.html: The frontend HTML app -- frontend/script.js: Main frontend JavaScript code -- frontend/gemini-live-api.js: Script for interacting with the Gemini API -- frontend/live-media-manager.js: Script for handling media input and output -- frontend/pcm-processor.js: Script for processing PCM audio -- frontend/cookieJar.js: Script for managing cookies +- `frontend/index.html`: The frontend HTML app +- `frontend/script.js`: Main frontend JavaScript code +- `frontend/gemini-live-api.js`: Script for interacting with the Gemini API +- `frontend/live-media-manager.js`: Script for handling media input and output +- `frontend/pcm-processor.js`: Script for processing PCM audio +- `frontend/cookieJar.js`: Script for managing cookies ![Demo](https://storage.googleapis.com/cloud-samples-data/generative-ai/image/demo-UI.png) @@ -41,85 +40,91 @@ git clone https://github.com/GoogleCloudPlatform/generative-ai.git cd generative-ai/gemini/multimodal-live-api/websocket-demo-app ``` -2. Create a new virtual environment and activate it: +1. Create a new virtual environment and activate it: ```sh python3 -m venv env source env/bin/activate ``` -3. Install dependencies: +1. Install dependencies: ```sh pip3 install -r backend/requirements.txt ``` -4. Start the Python WebSocket server: +1. Start the Python WebSocket server: ```sh python3 backend/main.py ``` -5. Start the frontend: - Make sure to open a **new** terminal window to run this command. Keep the backend server running in the first terminal. +1. Start the frontend: + +- Navigate to `script.js` on line 9, `const PROXY_URL = "wss://[THE_URL_YOU_COPIED_WITHOUT_HTTP]";` and replace `PROXY_URL` value with `ws://localhost:8000`. It should look like: `const PROXY_URL = "ws://localhost:8000;";`. Note the absence of the second "s" in "wss" as "ws" indicates a non-secure WebSocket connection. +- Right below on line 10, update `PROJECT_ID` with your Google Cloud project ID. +- Save the changes you've made to `script.js` +- Now make sure to open a **separate** terminal window from the backend to run this command (keep the backend server running in the first terminal). ```sh cd frontend python3 -m http.server ``` -6. Point your browser to the demo app UI based on the output of the terminal. (E.g., it may be http://localhost:8000, or it may use a different port.) +1. Point your browser to the demo app UI based on the output of the terminal. (e.g., it may be `http://localhost:8000`, or it may use a different port.) -7. Get your Google Cloud access token: +1. Get your Google Cloud access token: Run the following command in a terminal with gcloud installed to set your project, and to retrieve your access token. ```sh +gcloud components update +gcloud components install beta gcloud config set project YOUR-PROJECT-ID gcloud auth print-access-token ``` -8. Copy the access token from the previous step into the UI that you have open in your browser. +1. Copy the access token from the previous step into the UI that you have open in your browser. -9. Enter the model ID in the UI: - Replace `YOUR-PROJECT-ID` in the input with your credentials +1. Enter the model ID in the UI: + Replace `YOUR-PROJECT-ID` in the input with your Google Cloud Project ID. -10. Connect and interact with the demo: +1. Connect and interact with the demo: - After entering your Access Token and Model ID, press the connect button to connect your web app. Now you should be able to interact with Gemini 2.0 with the Multimodal Live API. -11. To interact with the app, you can do the following: +1. To interact with the app, you can do the following: - Text input: You can write a text prompt to send to the model by entering your message in the box and pressing the send arrow. The model will then respond via audio (turn up your volume!). -- Voice input: Press the pink microphone button and start speaking. The model will respond via audio. If you would like to mute your microphone, press the button with a slash through the microphone. +- Voice input: Press the microphone button to stop speaking. The model will respond via audio. If you would like to mute your microphone, press the button with a slash through the microphone. - Video input: The model will also capture your camera input and send it to Gemini. You can ask questions about current or previous video footage. For more details on how this works, visit the [documentation page for the Multimodal Live API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live). ### Setup in Cloud Shell 1. Open [Cloud Shell](https://cloud.google.com/shell/docs/editor-overview) -2. Upload the frontend and backend folders to your Cloud Shell Editor project. Alternatively, you can clone the repository and cd into the correct directory: +1. Upload the frontend and backend folders to your Cloud Shell Editor project. Alternatively, you can clone the repository and cd into the correct directory: ```sh git clone https://github.com/GoogleCloudPlatform/generative-ai.git cd generative-ai/gemini/multimodal-live-api/websocket-demo-app ``` -3. Open two new terminal windows. -4. Navigate to whichever folder in Cloud Shell you uploaded the code files to (i.e., using `cd your_folder_name`) +1. Open two new terminal windows. +1. Navigate to whichever folder in Cloud Shell you uploaded the code files to (i.e., using `cd your_folder_name`) -5. Install dependencies: In one of the terminal windows run: +1. Install dependencies: In one of the terminal windows run: ```sh pip3 install -r backend/requirements.txt ``` -6. Start the Python WebSocket server in one terminal. +1. Start the Python WebSocket server in one terminal. ```sh python3 backend/main.py ``` -7. In order for index.html to work properly, you will need to update the app URL inside script.js to point to the correct proxy server URL you just set up in the previous step. To do so: +1. In order for index.html to work properly, you will need to update the app URL inside script.js to point to the correct proxy server URL you just set up in the previous step. To do so: - Click on Web Preview (to the right of the Open Terminal button near the top) - Click "Preview on port 8080" (the port where you've setup the proxy server in the previous step) @@ -129,7 +134,7 @@ python3 backend/main.py - Replace `wss://your websocket server` with `wss://[THE_URL_YOU_COPIED_WITHOUT_HTTP]`. For example, it should look like: `const PROXY_URL = "wss://8080-cs-123456789-default.cs-us-central1-abcd.cloudshell.dev";` - save the changes you've made to script.js -8. Start the frontend: +1. Start the frontend: In the second terminal window, run the command below. Keep the backend server running in the first terminal. (Make sure you have navigated to the folder containing the code files, i.e. using `cd your_folder_name`) @@ -138,13 +143,13 @@ cd frontend python3 -m http.server ``` -9. Test the demo app: +1. Test the demo app: - Navigate to the Web Preview button again - Click on "Change port" - Change Preview Port to 8000, and then click on "Change and Preview". This should open up a new tab with the UI. -10. Going back to the tab with the Cloud Shell Editor, connect to the application by running the following command in a new terminal window: +1. Going back to the tab with the Cloud Shell Editor, connect to the application by running the following command in a new terminal window: ```sh gcloud config set project YOUR-PROJECT-ID @@ -156,7 +161,7 @@ gcloud auth print-access-token For example, it should look like: `projects/my-project-id/locations/us-central1/publishers/google/models/gemini-2.0-flash-exp` - Press the "Connect" button. Now you should be able to interact with Gemini 2.0 with the Multimodal Live API. -11. To interact with the app, you can do the following: +1. To interact with the app, you can do the following: - Text input: You can write a text prompt to send to the model by entering your message in the box and pressing the send arrow. The model will then respond via audio (turn up your volume!). - Voice input: Press the pink microphone button and start speaking. The model will respond via audio. If you would like to mute your microphone, press the button with a slash through the microphone. diff --git a/gemini/multimodal-live-api/websocket-demo-app/frontend/gemini-live-api.js b/gemini/multimodal-live-api/websocket-demo-app/frontend/gemini-live-api.js index 26e00afec83..f4880911462 100644 --- a/gemini/multimodal-live-api/websocket-demo-app/frontend/gemini-live-api.js +++ b/gemini/multimodal-live-api/websocket-demo-app/frontend/gemini-live-api.js @@ -1,75 +1,68 @@ - class GeminiLiveResponseMessage { constructor(data) { - this.data = ""; this.type = ""; this.endOfTurn = data?.serverContent?.turnComplete; - const parts = data?.serverContent?.modelTurn?.parts + const parts = data?.serverContent?.modelTurn?.parts; - if (parts.length && parts[0].text) { + if (data?.setupComplete) { + this.type = "SETUP COMPLETE"; + } else if (parts?.length && parts[0].text) { this.data = parts[0].text; - this.type = "TEXT" - } - else if (parts.length && parts[0].inlineData) { + this.type = "TEXT"; + } else if (parts?.length && parts[0].inlineData) { this.data = parts[0].inlineData.data; - this.type = "AUDIO" - } - else if (data?.setupComplete) { - this.type = "SETUP COMPLETE" + this.type = "AUDIO"; } } } - class GeminiLiveAPI { - constructor(proxyUrl, projectId, model, apiHost) { - this.proxyUrl = proxyUrl; - this.projectId = projectId - this.model = model - this.modelUri = `projects/${this.projectId}/locations/us-central1/publishers/google/models/${this.model}` + this.projectId = projectId; + this.model = model; + this.modelUri = `projects/${this.projectId}/locations/us-central1/publishers/google/models/${this.model}`; - this.responseModalities = ["AUDIO"] - this.systemInstructions = "" + this.responseModalities = ["AUDIO"]; + this.systemInstructions = ""; - this.apiHost = apiHost - this.serviceUrl = `wss://${this.apiHost}/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent` + this.apiHost = apiHost; + this.serviceUrl = `wss://${this.apiHost}/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`; this.onReceiveResponse = (message) => { - console.log("Default message received callback", message) - } + console.log("Default message received callback", message); + }; this.onConnectionStarted = () => { - console.log("Default onConnectionStarted") - } + console.log("Default onConnectionStarted"); + }; this.onErrorMessage = (message) => { alert(message); - } + }; - this.accessToken = '' - this.websocket = null + this.accessToken = ""; + this.websocket = null; - console.log("Created Gemini Live API object: ", this) + console.log("Created Gemini Live API object: ", this); } setProjectId(projectId) { - this.projectId = projectId - this.modelUri = `projects/${this.projectId}/locations/us-central1/publishers/google/models/${this.model}` + this.projectId = projectId; + this.modelUri = `projects/${this.projectId}/locations/us-central1/publishers/google/models/${this.model}`; } setAccessToken(newAccessToken) { - console.log("setting access token: ", newAccessToken) - this.accessToken = newAccessToken + console.log("setting access token: ", newAccessToken); + this.accessToken = newAccessToken; } connect(accessToken) { - this.setAccessToken(accessToken) - this.setupWebSocketToService() + this.setAccessToken(accessToken); + this.setupWebSocketToService(); } disconnect() { @@ -81,10 +74,10 @@ class GeminiLiveAPI { } onReceiveMessage(messageEvent) { - console.log("Message received: ", messageEvent) + console.log("Message received: ", messageEvent); const messageData = JSON.parse(messageEvent.data); const message = new GeminiLiveResponseMessage(messageData); - console.log("onReceiveMessageCallBack this ", this) + console.log("onReceiveMessageCallBack this ", this); this.onReceiveResponse(message); } @@ -101,7 +94,6 @@ class GeminiLiveAPI { this.webSocket.onerror = (event) => { console.log("websocket error: ", event); this.onErrorMessage("Connection error"); - }; this.webSocket.onopen = (event) => { @@ -113,28 +105,28 @@ class GeminiLiveAPI { this.webSocket.onmessage = this.onReceiveMessage.bind(this); } - sendInitialSetupMessages() { - const serviceSetupMessage = { bearer_token: this.accessToken, - service_url: this.serviceUrl + service_url: this.serviceUrl, }; - this.sendMessage(serviceSetupMessage) + this.sendMessage(serviceSetupMessage); const sessionSetupMessage = { setup: { model: this.modelUri, - generation_config: { response_modalities: this.responseModalities }, - system_instruction: { parts: [{ text: this.systemInstructions }] } - } - } - this.sendMessage(sessionSetupMessage) - + generation_config: { + response_modalities: this.responseModalities, + }, + system_instruction: { + parts: [{ text: this.systemInstructions }], + }, + }, + }; + this.sendMessage(sessionSetupMessage); } sendTextMessage(text) { - const textMessage = { client_content: { turns: [ @@ -146,7 +138,7 @@ class GeminiLiveAPI { turn_complete: true, }, }; - this.sendMessage(textMessage) + this.sendMessage(textMessage); } sendRealtimeInputMessage(data, mime_type) { @@ -157,19 +149,19 @@ class GeminiLiveAPI { mime_type: mime_type, data: data, }, - ] + ], }, }; - this.sendMessage(message) + this.sendMessage(message); } sendAudioMessage(base64PCM) { - this.sendRealtimeInputMessage(base64PCM, "audio/pcm") + this.sendRealtimeInputMessage(base64PCM, "audio/pcm"); } sendImageMessage(base64Image, mime_type = "image/jpeg") { - this.sendRealtimeInputMessage(base64Image, mime_type) + this.sendRealtimeInputMessage(base64Image, mime_type); } } -console.log("loaded gemini-live-api.js") \ No newline at end of file +console.log("loaded gemini-live-api.js"); diff --git a/gemini/multimodal-live-api/websocket-demo-app/frontend/live-media-manager.js b/gemini/multimodal-live-api/websocket-demo-app/frontend/live-media-manager.js index beeab2653b4..652c1f2f691 100644 --- a/gemini/multimodal-live-api/websocket-demo-app/frontend/live-media-manager.js +++ b/gemini/multimodal-live-api/websocket-demo-app/frontend/live-media-manager.js @@ -1,9 +1,8 @@ class LiveAudioOutputManager { - constructor() { - this.audioInputContext - this.workletNode - this.initalized = false + this.audioInputContext; + this.workletNode; + this.initialized = false; this.audioQueue = []; this.isPlaying = false; @@ -11,19 +10,20 @@ class LiveAudioOutputManager { this.initializeAudioContext(); } - async playAudioChunk(base64AudioChunk) { try { - if (!this.initalized) { - await this.initializeAudioContext.bind(this)(); + if (!this.initialized) { + await this.initializeAudioContext(); } if (this.audioInputContext.state === "suspended") { await this.audioInputContext.resume(); } - const arrayBuffer = LiveAudioOutputManager.base64ToArrayBuffer(base64AudioChunk); - const float32Data = LiveAudioOutputManager.convertPCM16LEToFloat32(arrayBuffer); + const arrayBuffer = + LiveAudioOutputManager.base64ToArrayBuffer(base64AudioChunk); + const float32Data = + LiveAudioOutputManager.convertPCM16LEToFloat32(arrayBuffer); this.workletNode.port.postMessage(float32Data); } catch (error) { @@ -32,19 +32,21 @@ class LiveAudioOutputManager { } async initializeAudioContext() { + if (this.initialized) return; - if (this.initalized) return; - - console.log("initializeAudioContext...") + console.log("initializeAudioContext..."); this.audioInputContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 }); await this.audioInputContext.audioWorklet.addModule("pcm-processor.js"); - this.workletNode = new AudioWorkletNode(this.audioInputContext, "pcm-processor"); + this.workletNode = new AudioWorkletNode( + this.audioInputContext, + "pcm-processor", + ); this.workletNode.connect(this.audioInputContext.destination); - this.initalized = true; - console.log("initializeAudioContext end") + this.initialized = true; + console.log("initializeAudioContext end"); } static base64ToArrayBuffer(base64) { @@ -66,24 +68,21 @@ class LiveAudioOutputManager { } } - - class LiveAudioInputManager { - constructor() { - this.audioContext - this.mediaRecorder - this.processor = false + this.audioContext; + this.mediaRecorder; + this.processor = false; this.pcmData = []; - this.deviceId = null + this.deviceId = null; this.interval = null; - this.stream = null + this.stream = null; this.onNewAudioRecordingChunk = (audioData) => { - console.log("New audio recording ") - } + console.log("New audio recording "); + }; } async connectMicrophone() { @@ -91,20 +90,18 @@ class LiveAudioInputManager { sampleRate: 16000, }); - let contraints = { + let constraints = { audio: { channelCount: 1, sampleRate: 16000, }, - } + }; if (this.deviceId) { - contraints.audio.deviceId = { exact: this.deviceId } + constraints.audio.deviceId = { exact: this.deviceId }; } - this.stream = await navigator.mediaDevices.getUserMedia( - contraints - ); + this.stream = await navigator.mediaDevices.getUserMedia(constraints); const source = this.audioContext.createMediaStreamSource(this.stream); this.processor = this.audioContext.createScriptProcessor(4096, 1, 1); @@ -126,8 +123,8 @@ class LiveAudioInputManager { } newAudioRecording(b64AudioData) { - console.log("newAudioRecording ") - this.onNewAudioRecordingChunk(b64AudioData) + console.log("newAudioRecording "); + this.onNewAudioRecordingChunk(b64AudioData); } recordChunk() { @@ -138,7 +135,7 @@ class LiveAudioInputManager { }); const base64 = btoa( - String.fromCharCode.apply(null, new Uint8Array(buffer)) + String.fromCharCode.apply(null, new Uint8Array(buffer)), ); this.newAudioRecording(base64); this.pcmData = []; @@ -149,60 +146,57 @@ class LiveAudioInputManager { this.processor.disconnect(); this.audioContext.close(); } catch { - + console.error("Error disconnecting microphone"); } clearInterval(this.interval); } async updateMicrophoneDevice(deviceId) { - this.deviceId = deviceId - this.disconnectMicrophone() - this.connectMicrophone() + this.deviceId = deviceId; + this.disconnectMicrophone(); + this.connectMicrophone(); } } - class LiveVideoManager { - constructor(previewVideoElement, previewCanvasElement) { this.previewVideoElement = previewVideoElement; this.previewCanvasElement = previewCanvasElement; - this.ctx = this.previewCanvasElement.getContext("2d") - this.stream = null + this.ctx = this.previewCanvasElement.getContext("2d"); + this.stream = null; this.interval = null; this.onNewFrame = (newFrame) => { - console.log("Default new frame trigger.") - } + console.log("Default new frame trigger."); + }; } async startWebcam() { try { const constraints = { - video: true + video: true, // video: { // width: { max: 640 }, // height: { max: 480 }, // }, }; - this.stream = await navigator.mediaDevices.getUserMedia(constraints); + this.stream = + await navigator.mediaDevices.getUserMedia(constraints); this.previewVideoElement.srcObject = this.stream; } catch (err) { console.error("Error accessing the webcam: ", err); } - setInterval(this.newFrame.bind(this), 1000) + setInterval(this.newFrame.bind(this), 1000); } stopWebcam() { clearInterval(this.interval); - this.stopStream() - + this.stopStream(); } stopStream() { - if (!this.stream) - return + if (!this.stream) return; const tracks = this.stream.getTracks(); @@ -211,65 +205,66 @@ class LiveVideoManager { }); } - async updateWebcamDevice(deviceId) { const constraints = { - video: { deviceId: { exact: deviceId } } - } + video: { deviceId: { exact: deviceId } }, + }; this.stream = await navigator.mediaDevices.getUserMedia(constraints); this.previewVideoElement.srcObject = this.stream; } captureFrameB64() { - - if (this.stream == null) return "" + if (this.stream == null) return ""; this.previewCanvasElement.width = this.previewVideoElement.videoWidth; this.previewCanvasElement.height = this.previewVideoElement.videoHeight; - this.ctx.drawImage(this.previewVideoElement, 0, 0, this.previewCanvasElement.width, this.previewCanvasElement.height); - const imageData = this.previewCanvasElement.toDataURL("image/jpeg").split(",")[1].trim(); - return imageData + this.ctx.drawImage( + this.previewVideoElement, + 0, + 0, + this.previewCanvasElement.width, + this.previewCanvasElement.height, + ); + const imageData = this.previewCanvasElement + .toDataURL("image/jpeg") + .split(",")[1] + .trim(); + return imageData; } newFrame() { - console.log("capturinng new frame") - const frameData = this.captureFrameB64() - this.onNewFrame(frameData) + console.log("capturing new frame"); + const frameData = this.captureFrameB64(); + this.onNewFrame(frameData); } - } - - class LiveScreenManager { - constructor(previewVideoElement, previewCanvasElement) { this.previewVideoElement = previewVideoElement; this.previewCanvasElement = previewCanvasElement; - this.ctx = this.previewCanvasElement.getContext("2d") - this.stream = null + this.ctx = this.previewCanvasElement.getContext("2d"); + this.stream = null; this.interval = null; this.onNewFrame = (newFrame) => { - console.log("Default new frame trigger: ", newFrame) - } + console.log("Default new frame trigger: ", newFrame); + }; } async startCapture() { try { - this.stream = await navigator.mediaDevices.getDisplayMedia(); this.previewVideoElement.srcObject = this.stream; } catch (err) { console.error("Error accessing the webcam: ", err); } - setInterval(this.newFrame.bind(this), 1000) + setInterval(this.newFrame.bind(this), 1000); } stopCapture() { clearInterval(this.interval); - if (!this.stream) - return + if (!this.stream) return; const tracks = this.stream.getTracks(); @@ -278,24 +273,30 @@ class LiveScreenManager { }); } - captureFrameB64() { - - if (this.stream == null) return "" + if (this.stream == null) return ""; this.previewCanvasElement.width = this.previewVideoElement.videoWidth; this.previewCanvasElement.height = this.previewVideoElement.videoHeight; - this.ctx.drawImage(this.previewVideoElement, 0, 0, this.previewCanvasElement.width, this.previewCanvasElement.height); - const imageData = this.previewCanvasElement.toDataURL("image/jpeg").split(",")[1].trim(); - return imageData + this.ctx.drawImage( + this.previewVideoElement, + 0, + 0, + this.previewCanvasElement.width, + this.previewCanvasElement.height, + ); + const imageData = this.previewCanvasElement + .toDataURL("image/jpeg") + .split(",")[1] + .trim(); + return imageData; } newFrame() { - console.log("capturinng new frame") - const frameData = this.captureFrameB64() - this.onNewFrame(frameData) + console.log("capturing new frame"); + const frameData = this.captureFrameB64(); + this.onNewFrame(frameData); } - } -console.log("loaded live-media-manager.js") \ No newline at end of file +console.log("loaded live-media-manager.js"); diff --git a/gemini/multimodal-live-api/websocket-demo-app/frontend/pcm-processor.js b/gemini/multimodal-live-api/websocket-demo-app/frontend/pcm-processor.js index b79a36caf05..086a936401c 100644 --- a/gemini/multimodal-live-api/websocket-demo-app/frontend/pcm-processor.js +++ b/gemini/multimodal-live-api/websocket-demo-app/frontend/pcm-processor.js @@ -4,31 +4,33 @@ * @description Processes PCM audio data in a Web Audio API context */ class PCMProcessor extends AudioWorkletProcessor { - constructor() { - super(); - this.buffer = new Float32Array(); + constructor() { + super(); + this.buffer = new Float32Array(); - this.port.onmessage = (e) => { - const newData = e.data; - const newBuffer = new Float32Array(this.buffer.length + newData.length); - newBuffer.set(this.buffer); - newBuffer.set(newData, this.buffer.length); - this.buffer = newBuffer; - }; - } + this.port.onmessage = (e) => { + const newData = e.data; + const newBuffer = new Float32Array( + this.buffer.length + newData.length, + ); + newBuffer.set(this.buffer); + newBuffer.set(newData, this.buffer.length); + this.buffer = newBuffer; + }; + } - process(inputs, outputs, parameters) { - const output = outputs[0]; - const channelData = output[0]; + process(inputs, outputs, parameters) { + const output = outputs[0]; + const channelData = output[0]; - if (this.buffer.length >= channelData.length) { - channelData.set(this.buffer.slice(0, channelData.length)); - this.buffer = this.buffer.slice(channelData.length); - return true; - } + if (this.buffer.length >= channelData.length) { + channelData.set(this.buffer.slice(0, channelData.length)); + this.buffer = this.buffer.slice(channelData.length); + return true; + } - return true; - } + return true; + } } registerProcessor("pcm-processor", PCMProcessor); diff --git a/gemini/multimodal-live-api/websocket-demo-app/frontend/script.js b/gemini/multimodal-live-api/websocket-demo-app/frontend/script.js index 45a53eafe6a..06dcd7df8f6 100644 --- a/gemini/multimodal-live-api/websocket-demo-app/frontend/script.js +++ b/gemini/multimodal-live-api/websocket-demo-app/frontend/script.js @@ -1,11 +1,10 @@ window.addEventListener("load", (event) => { console.log("Hello Gemini Realtime Demo!"); - setAvalibleCamerasOptions(); - setAvalibleMicrophoneOptions(); + setAvailableCamerasOptions(); + setAvailableMicrophoneOptions(); }); - const PROXY_URL = "wss://[THE_URL_YOU_COPIED_WITHOUT_HTTP]"; const PROJECT_ID = "your project id"; const MODEL = "gemini-2.0-flash-exp"; @@ -13,8 +12,7 @@ const API_HOST = "us-central1-aiplatform.googleapis.com"; const accessTokenInput = document.getElementById("token"); const projectInput = document.getElementById("project"); -const systemInstructionsInput = - document.getElementById("systemInstructions"); +const systemInstructionsInput = document.getElementById("systemInstructions"); CookieJar.init("token"); CookieJar.init("project"); @@ -33,22 +31,17 @@ const screenBtn = document.getElementById("screenBtn"); const cameraSelect = document.getElementById("cameraSource"); const micSelect = document.getElementById("audioSource"); -const geminiLiveApi = new GeminiLiveAPI( - PROXY_URL, - PROJECT_ID, - MODEL, - API_HOST -); +const geminiLiveApi = new GeminiLiveAPI(PROXY_URL, PROJECT_ID, MODEL, API_HOST); geminiLiveApi.onErrorMessage = (message) => { showDialogWithMessage(message); - setAppSatus("disconnected"); + setAppStatus("disconnected"); }; function getSelectedResponseModality() { // return "AUDIO"; const radioButtons = document.querySelectorAll( - 'md-radio[name="responseModality"]' + 'md-radio[name="responseModality"]', ); let selectedValue; @@ -66,13 +59,13 @@ function getSystemInstructions() { } function connectBtnClick() { - setAppSatus("connecting"); + setAppStatus("connecting"); geminiLiveApi.responseModalities = getSelectedResponseModality(); geminiLiveApi.systemInstructions = getSystemInstructions(); geminiLiveApi.onConnectionStarted = () => { - setAppSatus("connected"); + setAppStatus("connected"); startAudioInput(); }; @@ -144,10 +137,7 @@ const canvasElement = document.getElementById("canvas"); const liveVideoManager = new LiveVideoManager(videoElement, canvasElement); -const liveScreenManager = new LiveScreenManager( - videoElement, - canvasElement -); +const liveScreenManager = new LiveScreenManager(videoElement, canvasElement); liveVideoManager.onNewFrame = (b64Image) => { geminiLiveApi.sendImageMessage(b64Image); @@ -188,7 +178,7 @@ function newMicSelected() { } function disconnectBtnClick() { - setAppSatus("disconnected"); + setAppStatus("disconnected"); geminiLiveApi.disconnect(); stopAudioInput(); } @@ -200,7 +190,7 @@ function showDialogWithMessage(messageText) { dialog.show(); } -async function getAvalibleDevices(deviceType) { +async function getAvailableDevices(deviceType) { const allDevices = await navigator.mediaDevices.enumerateDevices(); const devices = []; allDevices.forEach((device) => { @@ -214,12 +204,12 @@ async function getAvalibleDevices(deviceType) { return devices; } -async function getAvalibleCameras() { - return await this.getAvalibleDevices("videoinput"); +async function getAvailableCameras() { + return await this.getAvailableDevices("videoinput"); } -async function getAvalibleAudioInputs() { - return await this.getAvalibleDevices("audioinput"); +async function getAvailableAudioInputs() { + return await this.getAvailableDevices("audioinput"); } function setMaterialSelect(allOptions, selectElement) { @@ -236,19 +226,19 @@ function setMaterialSelect(allOptions, selectElement) { }); } -async function setAvalibleCamerasOptions() { - const cameras = await getAvalibleCameras(); +async function setAvailableCamerasOptions() { + const cameras = await getAvailableCameras(); const videoSelect = document.getElementById("cameraSource"); setMaterialSelect(cameras, videoSelect); } -async function setAvalibleMicrophoneOptions() { - const mics = await getAvalibleAudioInputs(); +async function setAvailableMicrophoneOptions() { + const mics = await getAvailableAudioInputs(); const audioSelect = document.getElementById("audioSource"); setMaterialSelect(mics, audioSelect); } -function setAppSatus(status) { +function setAppStatus(status) { disconnected.hidden = true; connecting.hidden = true; connected.hidden = true; @@ -269,4 +259,4 @@ function setAppSatus(status) { break; default: } -} \ No newline at end of file +}