From 1848b92ccab14379d51fa83cdc2cdbe6223982fe Mon Sep 17 00:00:00 2001 From: mythilytm Date: Mon, 4 Nov 2024 11:25:37 -0500 Subject: [PATCH] Add contact summary as a field to the training set --- .../generate-ai-training-set/hrmdbAccess.ts | 7 +++++-- .../scheduled-tasks/generate-ai-training-set/index.ts | 2 +- .../generate-ai-training-set/trainingSetDocument.ts | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/hrm-domain/scheduled-tasks/generate-ai-training-set/hrmdbAccess.ts b/hrm-domain/scheduled-tasks/generate-ai-training-set/hrmdbAccess.ts index 41437a3f0..1d98c26db 100644 --- a/hrm-domain/scheduled-tasks/generate-ai-training-set/hrmdbAccess.ts +++ b/hrm-domain/scheduled-tasks/generate-ai-training-set/hrmdbAccess.ts @@ -21,10 +21,11 @@ import { db, pgp } from '@tech-matters/hrm-core/connection-pool'; const HIGH_WATER_MARK = 1000; -const SELECT_CATEGORIES_AND_TRANSCRIPTS_SQL = ` +const SELECT_CATEGORIES_SUMMARY_AND_TRANSCRIPTS_SQL = ` SELECT c."id" AS "contactId", c."rawJson"->'categories' AS "categories", + c."rawJson"->'caseInformation'->>'callSummary' AS "summary", cm."storeTypeSpecificData", cm."storeTypeSpecificData"->'location'->>'bucket' AS "transcriptBucket", cm."storeTypeSpecificData"->'location'->>'key' AS "transcriptKey" @@ -33,6 +34,7 @@ const SELECT_CATEGORIES_AND_TRANSCRIPTS_SQL = ` WHERE c."accountSid" = $ AND (SELECT COUNT(*) FROM jsonb_object_keys(COALESCE(c."rawJson"->'categories', '{}'::jsonb))) > 0 AND + COALESCE(c."rawJson"->'caseInformation'->>'callSummary', '') <> '' AND cm."storeType" = 'S3' AND cm."storeTypeSpecificData"->>'type' = 'transcript' AND cm."storeTypeSpecificData"->>'location' IS NOT NULL @@ -42,6 +44,7 @@ export type TrainingSetContact = { contactId: string; accountSid: HrmAccountId; categories: Record; + summary: string; transcriptKey: string; transcriptBucket: string; }; @@ -50,7 +53,7 @@ export const streamTrainingSetContacts = async ( accountSid: HrmAccountId, ): Promise => { const qs = new QueryStream( - pgp.as.format(SELECT_CATEGORIES_AND_TRANSCRIPTS_SQL, { accountSid }), + pgp.as.format(SELECT_CATEGORIES_SUMMARY_AND_TRANSCRIPTS_SQL, { accountSid }), [], { highWaterMark: HIGH_WATER_MARK }, ); diff --git a/hrm-domain/scheduled-tasks/generate-ai-training-set/index.ts b/hrm-domain/scheduled-tasks/generate-ai-training-set/index.ts index 3cfbb171c..f5755673f 100644 --- a/hrm-domain/scheduled-tasks/generate-ai-training-set/index.ts +++ b/hrm-domain/scheduled-tasks/generate-ai-training-set/index.ts @@ -69,7 +69,7 @@ export const generate = async ( }); for (const { accountSid, shortCode } of accountSidMappings) { - // Query the DB for contacts and start streaming records with their ID, categories and transcript location + // Query the DB for contacts and start streaming records with their ID, categories, contact summary and transcript location const contactStream = await streamTrainingSetContacts(accountSid); console.log(`Streaming contacts for ${shortCode}...`); diff --git a/hrm-domain/scheduled-tasks/generate-ai-training-set/trainingSetDocument.ts b/hrm-domain/scheduled-tasks/generate-ai-training-set/trainingSetDocument.ts index ad3cb0438..5c3912036 100644 --- a/hrm-domain/scheduled-tasks/generate-ai-training-set/trainingSetDocument.ts +++ b/hrm-domain/scheduled-tasks/generate-ai-training-set/trainingSetDocument.ts @@ -21,15 +21,17 @@ import { getS3Object } from '@tech-matters/s3-client'; export type TrainingSetDocument = { contactId: string; categories: Record; + summary: string; messages: ExportTranscript['messages']; }; const trainingSetDocument = ( - { contactId, categories }: TrainingSetContact, + { contactId, categories, summary }: TrainingSetContact, { messages }: ExportTranscript, ): TrainingSetDocument => ({ contactId, categories, + summary, messages, });