From 588cc7ecb7b6bae2c9e630704e0eded566a63519 Mon Sep 17 00:00:00 2001 From: Vladimir Aleksiev Date: Mon, 13 May 2024 12:42:02 +0300 Subject: [PATCH] add ingestion purpose to space ingest --- src/index.ts | 10 +++++----- src/ingest.ts | 24 +++++++++++++++++------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/index.ts b/src/index.ts index bba59b2..5de23de 100644 --- a/src/index.ts +++ b/src/index.ts @@ -3,10 +3,10 @@ import { AlkemioClient, createConfigUsingEnvVars } from '@alkemio/client-lib'; import Documents, { DocumentType } from './documents'; import logger from './logger'; -import ingest from './ingest'; +import ingest, { SpaceIngestionPurpose } from './ingest'; import generateDocument from './generate-document'; -export const main = async (spaceId: string) => { +export const main = async (spaceId: string, purpose: SpaceIngestionPurpose) => { logger.info(`Ingest invoked for space ${spaceId}`); const config = createConfigUsingEnvVars(); const alkemioCliClient = new AlkemioClient(config); @@ -68,7 +68,7 @@ export const main = async (spaceId: string) => { ); } } - await ingest(space.nameID, documents); + await ingest(space.nameID, documents, purpose); logger.info('Space ingested.'); }; @@ -83,7 +83,7 @@ export const main = async (spaceId: string) => { const connectionString = `amqp://${RABBITMQ_USER}:${RABBITMQ_PASSWORD}@${RABBITMQ_HOST}:${RABBITMQ_PORT}`; const conn = await amqplib.connect(connectionString); - const queue = RABBITMQ_QUEUE ?? 'virtual-contributor-added-to-space'; + const queue = RABBITMQ_QUEUE ?? 'ingest-space'; const channel = await conn.createChannel(); await channel.assertQueue(queue); @@ -94,7 +94,7 @@ export const main = async (spaceId: string) => { //maybe share them in a package //publish a confifrmation const decoded = JSON.parse(JSON.parse(msg.content.toString())); - await main(decoded.spaceId); + await main(decoded.spaceId, decoded.purpose); // add rety mechanism as well channel.ack(msg); } else { diff --git a/src/ingest.ts b/src/ingest.ts index 0938635..6a46690 100644 --- a/src/ingest.ts +++ b/src/ingest.ts @@ -1,8 +1,18 @@ import { ChromaClient } from 'chromadb'; import Documents from './documents'; import { OpenAIClient, AzureKeyCredential } from '@azure/openai'; - -export default async (space: string, docs: Documents) => { +import { logger } from '@alkemio/client-lib'; + +export enum SpaceIngestionPurpose { + Knowledge = 'kwnowledge', + Context = 'context', +} + +export default async ( + spaceNameID: string, + docs: Documents, + purpose: SpaceIngestionPurpose +) => { const endpoint = process.env.AZURE_OPENAI_ENDPOINT; const key = process.env.AZURE_OPENAI_API_KEY; const depolyment = process.env.EMBEDDINGS_DEPLOYMENT_NAME; @@ -11,10 +21,6 @@ export default async (space: string, docs: Documents) => { throw new Error('AI configuration missing from ENV.'); } - console.log({ - path: `http://${process.env.VECTOR_DB_HOST}:${process.env.VECTOR_DB_PORT}`, - }); - const client = new ChromaClient({ path: `http://${process.env.VECTOR_DB_HOST}:${process.env.VECTOR_DB_PORT}`, }); @@ -27,7 +33,11 @@ export default async (space: string, docs: Documents) => { const openAi = new OpenAIClient(endpoint, new AzureKeyCredential(key)); const { data } = await openAi.getEmbeddings(depolyment, forEmbed.documents); - const collection = await client.getOrCreateCollection({ name: space }); + const name = `${spaceNameID}-${purpose}`; + logger.info(`Adding to collection ${name}`); + const collection = await client.getOrCreateCollection({ + name, + }); await collection.upsert({ ...forEmbed,