Skip to content

Commit

Permalink
add ingestion purpose to space ingest
Browse files Browse the repository at this point in the history
  • Loading branch information
valeksiev committed May 13, 2024
1 parent 679f342 commit 588cc7e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
10 changes: 5 additions & 5 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ import { AlkemioClient, createConfigUsingEnvVars } from '@alkemio/client-lib';

import Documents, { DocumentType } from './documents';
import logger from './logger';
import ingest from './ingest';
import ingest, { SpaceIngestionPurpose } from './ingest';
import generateDocument from './generate-document';

export const main = async (spaceId: string) => {
export const main = async (spaceId: string, purpose: SpaceIngestionPurpose) => {
logger.info(`Ingest invoked for space ${spaceId}`);
const config = createConfigUsingEnvVars();
const alkemioCliClient = new AlkemioClient(config);
Expand Down Expand Up @@ -68,7 +68,7 @@ export const main = async (spaceId: string) => {
);
}
}
await ingest(space.nameID, documents);
await ingest(space.nameID, documents, purpose);
logger.info('Space ingested.');
};

Expand All @@ -83,7 +83,7 @@ export const main = async (spaceId: string) => {
const connectionString = `amqp://${RABBITMQ_USER}:${RABBITMQ_PASSWORD}@${RABBITMQ_HOST}:${RABBITMQ_PORT}`;

const conn = await amqplib.connect(connectionString);
const queue = RABBITMQ_QUEUE ?? 'virtual-contributor-added-to-space';
const queue = RABBITMQ_QUEUE ?? 'ingest-space';

const channel = await conn.createChannel();
await channel.assertQueue(queue);
Expand All @@ -94,7 +94,7 @@ export const main = async (spaceId: string) => {
//maybe share them in a package
//publish a confifrmation
const decoded = JSON.parse(JSON.parse(msg.content.toString()));
await main(decoded.spaceId);
await main(decoded.spaceId, decoded.purpose);
// add rety mechanism as well
channel.ack(msg);
} else {
Expand Down
24 changes: 17 additions & 7 deletions src/ingest.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
import { ChromaClient } from 'chromadb';
import Documents from './documents';
import { OpenAIClient, AzureKeyCredential } from '@azure/openai';

export default async (space: string, docs: Documents) => {
import { logger } from '@alkemio/client-lib';

export enum SpaceIngestionPurpose {
Knowledge = 'kwnowledge',
Context = 'context',
}

export default async (
spaceNameID: string,
docs: Documents,
purpose: SpaceIngestionPurpose
) => {
const endpoint = process.env.AZURE_OPENAI_ENDPOINT;
const key = process.env.AZURE_OPENAI_API_KEY;
const depolyment = process.env.EMBEDDINGS_DEPLOYMENT_NAME;
Expand All @@ -11,10 +21,6 @@ export default async (space: string, docs: Documents) => {
throw new Error('AI configuration missing from ENV.');
}

console.log({
path: `http://${process.env.VECTOR_DB_HOST}:${process.env.VECTOR_DB_PORT}`,
});

const client = new ChromaClient({
path: `http://${process.env.VECTOR_DB_HOST}:${process.env.VECTOR_DB_PORT}`,
});
Expand All @@ -27,7 +33,11 @@ export default async (space: string, docs: Documents) => {
const openAi = new OpenAIClient(endpoint, new AzureKeyCredential(key));
const { data } = await openAi.getEmbeddings(depolyment, forEmbed.documents);

const collection = await client.getOrCreateCollection({ name: space });
const name = `${spaceNameID}-${purpose}`;
logger.info(`Adding to collection ${name}`);
const collection = await client.getOrCreateCollection({
name,
});

await collection.upsert({
...forEmbed,
Expand Down

0 comments on commit 588cc7e

Please sign in to comment.