Skip to content

Commit

Permalink
Merge pull request #16 from RubyResearch/vlad/moondream_integration
Browse files Browse the repository at this point in the history
Integrate Moondream into Twitter and Discord
  • Loading branch information
lalalune authored Jul 31, 2024
2 parents 946d217 + 77daf17 commit 4ac26a6
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 4 deletions.
26 changes: 26 additions & 0 deletions src/clients/discord/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ import settings from "../../core/settings.ts";
import { AudioMonitor } from "./audioMonitor.ts";
import { commands } from "./commands.ts";
import { InterestChannels, ResponseType } from "./types.ts";
import ImageRecognitionService from "../../services/imageRecognition.ts"
import { extractAnswer } from "../../core/util.ts";
import { SpeechSynthesizer } from "../../services/speechSynthesis.ts";
import WavEncoder from "wav-encoder";

Expand Down Expand Up @@ -101,6 +103,7 @@ export class DiscordClient extends EventEmitter {
private agent: Agent;
private bio: string;
private transcriber: any;
private imageRecognitionService: ImageRecognitionService;
speechSynthesizer: SpeechSynthesizer;

constructor(agent: Agent, bio: string) {
Expand All @@ -124,6 +127,9 @@ export class DiscordClient extends EventEmitter {

this.initializeTranscriber();

this.imageRecognitionService = new ImageRecognitionService();
this.imageRecognitionService.initialize();

this.client.once(Events.ClientReady, async (readyClient: { user: { tag: any; id: any } }) => {
console.log(`Logged in as ${readyClient.user?.tag}`);
console.log("Use this URL to add the bot to your server:");
Expand Down Expand Up @@ -210,6 +216,12 @@ export class DiscordClient extends EventEmitter {
const user_id = message.author.id as UUID;
const userName = message.author.username;
const channelId = message.channel.id;

// Check for image attachments
if (message.attachments.size > 0) {
await this.handleImageRecognition(message);
}

const textContent = message.content;

try {
Expand Down Expand Up @@ -316,6 +328,20 @@ export class DiscordClient extends EventEmitter {
}
}

private async handleImageRecognition(message: DiscordMessage) {
const attachment = message.attachments.first();
if (attachment && attachment.contentType?.startsWith('image/')) {
try {
const recognizedText = await this.imageRecognitionService.recognizeImage(attachment.url);
const description = extractAnswer(recognizedText[0]);
// Add the image description to the completion context
message.content += `\nImage description: ${description}`;
} catch (error) {
console.error('Error recognizing image:', error);
await message.reply('Sorry, I encountered an error while processing the image.');
}
}
}

private async ensureUserExists(agentId: UUID, userName: string, botToken: string | null = null) {
if (!userName && botToken) {
Expand Down
7 changes: 5 additions & 2 deletions src/clients/twitter/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import settings from "../../core/settings.ts";

import { fileURLToPath } from 'url';
import ImageRecognitionService from "../../services/imageRecognition.ts";
import { extractAnswer } from "../../core/util.ts";

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
Expand Down Expand Up @@ -123,8 +124,10 @@ export class ClientBase extends EventEmitter {

async describeImage(imageUrl: string): Promise<string> {
try {
const description = await this.imageRecognitionService.recognizeImage(imageUrl);
return description[0] || 'Unable to describe the image.';
const recognizedText = await this.imageRecognitionService.recognizeImage(imageUrl);
const description = extractAnswer(recognizedText[0]);

return description || 'Unable to describe the image.';
} catch (error) {
console.error('Error describing image:', error);
return 'Error occurred while describing the image.';
Expand Down
9 changes: 8 additions & 1 deletion src/core/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,11 @@ export function prependWavHeader(readable: Readable, audioLength: number, sample
passThrough.end();
});
return passThrough;
}
}


export function extractAnswer(text: string): string {
const startIndex = text.indexOf('Answer: ') + 8;
const endIndex = text.indexOf('<|endoftext|>', 11);
return text.slice(startIndex, endIndex);
};
3 changes: 2 additions & 1 deletion src/services/imageRecognition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class ImageRecognitionService {

constructor() {
this.modelId = 'Xenova/moondream2';
this.device = 'webgpu';
// this.device = 'webgpu';
this.device = 'cpu';
this.model = null;
this.processor = null;
this.tokenizer = null;
Expand Down

0 comments on commit 4ac26a6

Please sign in to comment.