Skip to content

Commit

Permalink
Allow the question sent to cosine similarity to be diff from prompt (#…
Browse files Browse the repository at this point in the history
…150)

- Add cosine prompt to data extract config

Signed-off-by: Sean Sundberg <[email protected]>
  • Loading branch information
seansund authored Nov 9, 2023
1 parent ed09969 commit 93c0f2a
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 46 deletions.
22 changes: 11 additions & 11 deletions config/KYCDataValidationQuestions.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
ID,Question,Source,Model,Token,PoCScope,Company,Prompt,Expected Answer,watsonx Response
1,What is Name and trading name of the organization?,Discovery,google/flan-t5-xxl,20,,,"From below text,What is Name and trading name of the organization #?",,
2,What is the registered address of the company?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,From below text find the registered address of the company #?,"1 St James's Square, London, SW1Y 4PD","1 St James's Square, London, SW1Y 4PD"
3,What is the business/trading address of the company?,Discovery,google/flan-t5-xxl,20,,,"from below text, What is the business / trading Address of the Company #?",,
4,What is identification number of the organization?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,from below text find identification number of the organization #? ,102498,102498
ID,Question,Source,Model,Token,PoCScope,Company,Prompt,Expected Answer,Cosine Prompt
1,What is name and trading name of the organization?,Discovery,meta-llama/llama-2-70b-chat,20,,,"From the below text, what is name and trading name of the organization #?",,"What is name and trading name of #?"
2,What is the registered address of the company?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"From the below text, find the Registered address of the Company # ?","1 St James's Square, London, SW1Y 4PD","What is the Registered office address of company # ?"
3,What is the business/trading address of the company?,Discovery,google/flan-t5-xxl,20,,,"From the below text, what is the business / trading address of the company #?",,"What is the business / trading address of #?"
4,What is identification number of the organization?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,From the below text find identification number of the organization #?,102498,
5,Who are the key controllers and authorized signatories?,KYCSummary,meta-llama/llama-2-70b-chat,30,,,"from below text, Who are the key controllers and authorized signatories of the company #?",,
6,Names all the active directors of the company.,KYCSummary,meta-llama/llama-2-70b-chat,30,X,BP P.L.C,"from below text, find the names of all active directors of the company # in sequence ?","LUND, Helge BLANC, Amanda Jayne DALEY, Pamela","LUND, Helge BLANC, Amanda Jayne DALEY, Pamela"
7,"What is the status of the organization ex; active, dissolved?",Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"from below text, what is the status of the organization # ex: Active or Dissolved ?",Active,Active
8,What is the year of incorporation?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"from below text, What is the year of incorporation of the company #?",1909,1909
6,Names all the active directors of the company.,KYCSummary,meta-llama/llama-2-70b-chat,30,X,BP P.L.C,"from below text, find the names of all active directors of the company # in sequence ?","LUND, Helge BLANC, Amanda Jayne DALEY, Pamela",
7,"What is the status of the organization ex; active, dissolved?",Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"from below text, what is the status of the organization # ex: Active or Dissolved ?",Active,
8,What is the year of incorporation?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"from below text, What is the year of incorporation of the company #?",1909,
9,Who are the shareholders of the company along with the percentage of ownership?,Discovery,google/flan-t5-xxl,20,,,"from below text, Who are the shareholders of the company # along with the percentage of ownership?",,
10,Who is the ultimate owner of the company?,KYCSummary,meta-llama/llama-2-70b-chat,30,,,"from below text, Who is the ultimate owner of the company #?",,
11,Who are the key controllers and authorized signatories?,KYCSummary,meta-llama/llama-2-70b-chat,30,X,,"from below text, Who are the key controllers and authorized signatories of the company #?",,
12,What is the industry type/SIC/NICS code of the company?,KYCSummary,google/flan-t5-xxl,20,,,"from below text, What is the industry type/SIC/NICS code of the company #?",,
13,What are the products utilized by the company?,KYCSummary,google/flan-ul2,20,X,,"from below text, What are the products manufactured by the company #?",,
14,What is/are operation location/s or jurisdiction/s?,Discovery,google/flan-t5-xxl,20,,,"from below text, What is/are operation location/s or jurisdiction/s of the comoany #?",,
14,What is/are operation location/s or jurisdiction/s?,Discovery,google/flan-t5-xxl,20,,,"from below text, What is/are operation location/s or jurisdiction/s of the company #?",,
15,Number of employees of the firm,KYCSummary,meta-llama/llama-2-70b-chat,30,X,,"from below text, find the Number of employees of the company #?",,
16,Name of the subsidiary of the company,Discovery,google/flan-t5-xxl,20,,,"from below text, find the Name of the subsidiary of the company #?",,
17,What is the Legal entity Type of the organization ex; publicly traded/limited liability etc.,Discovery,google/flan-t5-xxl,30,X,,"from below text, What is the Legal entity Type of the organization # ex; publicly traded or limited liability or Private limited? etc.",,
17,What is the Legal entity Type of the organization ex; publicly traded/limited liability etc.,Discovery,google/flan-t5-xxl,30,X,,"from below text, What is the Legal entity Type of the organization # ex; Public limited or publicly traded or limited liability or Private limited? etc.",,
18,What is the turnover or revenue of the organization?,KYCSummary,meta-llama/llama-2-70b-chat,30,X,,"from below text, find the turnover or revenue of the organization #?",,
19,Certificate/licence issued by the government.,Discovery,google/flan-t5-xxl,20,,,"from below text, What is the Certificate/licence issued by the government for company #?",,
20,Whats is the next date of confirmation statement?,Discovery,google/flan-t5-xxl,30,X,BP P.L.C,"from below text, find the next date of confirmation statement for company #?",30/06/24,30/06/24
20,Whats is the next date of confirmation statement?,Discovery,google/flan-t5-xxl,30,X,BP P.L.C,"from below text, find the next date of confirmation statement for company #?",30/06/24,
26 changes: 14 additions & 12 deletions src/services/data-extraction/data-extraction.csv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,34 +6,35 @@ import {DataExtractionApi} from "./data-extraction.api";
import {DataExtractionQuestionModel, DataExtractionResultModel} from "../../models";
import {first, parseCsv} from "../../utils";

const csvFile: string = `ID,Question,Source,Model,Token,PoCScope,Company,Prompt,Expected Answer,watsonx Response
1,What is Name and trading name of the organization?,Discovery,google/flan-t5-xxl,20,,,"From below text,What is Name and trading name of the organization #?",,
2,What is the registered address of the company?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,From below text find the registered address of the company #?,"1 St James's Square, London, SW1Y 4PD","1 St James's Square, London, SW1Y 4PD"
3,What is the business/trading address of the company?,Discovery,google/flan-t5-xxl,20,,,"from below text, What is the business / trading Address of the Company #?",,
4,What is identification number of the organization?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,from below text find identification number of the organization #? ,102498,102498
const csvFile: string = `ID,Question,Source,Model,Token,PoCScope,Company,Prompt,Expected Answer,Cosine Prompt
1,What is name and trading name of the organization?,Discovery,meta-llama/llama-2-70b-chat,20,,,"From the below text, what is name and trading name of the organization #?",,"What is name and trading name of #?"
2,What is the registered address of the company?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"From the below text, find the Registered address of the Company # ?","1 St James's Square, London, SW1Y 4PD","What is the Registered office address of company # ?"
3,What is the business/trading address of the company?,Discovery,google/flan-t5-xxl,20,,,"From the below text, what is the business / trading address of the company #?",,"What is the business / trading address of #?"
4,What is identification number of the organization?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,From the below text find identification number of the organization #?,102498,
5,Who are the key controllers and authorized signatories?,KYCSummary,meta-llama/llama-2-70b-chat,30,,,"from below text, Who are the key controllers and authorized signatories of the company #?",,
6,Names all the active directors of the company.,KYCSummary,meta-llama/llama-2-70b-chat,30,X,BP P.L.C,"from below text, find the names of all active directors of the company # in sequence ?","LUND, Helge BLANC, Amanda Jayne DALEY, Pamela","LUND, Helge BLANC, Amanda Jayne DALEY, Pamela"
7,"What is the status of the organization ex; active, dissolved?",Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"from below text, what is the status of the organization # ex: Active or Dissolved ?",Active,Active
8,What is the year of incorporation?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"from below text, What is the year of incorporation of the company #?",1909,1909
6,Names all the active directors of the company.,KYCSummary,meta-llama/llama-2-70b-chat,30,X,BP P.L.C,"from below text, find the names of all active directors of the company # in sequence ?","LUND, Helge BLANC, Amanda Jayne DALEY, Pamela",
7,"What is the status of the organization ex; active, dissolved?",Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"from below text, what is the status of the organization # ex: Active or Dissolved ?",Active,
8,What is the year of incorporation?,Discovery,google/flan-t5-xxl,20,X,BP P.L.C,"from below text, What is the year of incorporation of the company #?",1909,
9,Who are the shareholders of the company along with the percentage of ownership?,Discovery,google/flan-t5-xxl,20,,,"from below text, Who are the shareholders of the company # along with the percentage of ownership?",,
10,Who is the ultimate owner of the company?,KYCSummary,meta-llama/llama-2-70b-chat,30,,,"from below text, Who is the ultimate owner of the company #?",,
11,Who are the key controllers and authorized signatories?,KYCSummary,meta-llama/llama-2-70b-chat,30,X,,"from below text, Who are the key controllers and authorized signatories of the company #?",,
12,What is the industry type/SIC/NICS code of the company?,KYCSummary,google/flan-t5-xxl,20,,,"from below text, What is the industry type/SIC/NICS code of the company #?",,
13,What are the products utilized by the company?,KYCSummary,google/flan-ul2,20,X,,"from below text, What are the products manufactured by the company #?",,
14,What is/are operation location/s or jurisdiction/s?,Discovery,google/flan-t5-xxl,20,,,"from below text, What is/are operation location/s or jurisdiction/s of the comoany #?",,
14,What is/are operation location/s or jurisdiction/s?,Discovery,google/flan-t5-xxl,20,,,"from below text, What is/are operation location/s or jurisdiction/s of the company #?",,
15,Number of employees of the firm,KYCSummary,meta-llama/llama-2-70b-chat,30,X,,"from below text, find the Number of employees of the company #?",,
16,Name of the subsidiary of the company,Discovery,google/flan-t5-xxl,20,,,"from below text, find the Name of the subsidiary of the company #?",,
17,What is the Legal entity Type of the organization ex; publicly traded/limited liability etc.,Discovery,google/flan-t5-xxl,30,X,,"from below text, What is the Legal entity Type of the organization # ex; publicly traded or limited liability or Private limited? etc.",,
17,What is the Legal entity Type of the organization ex; publicly traded/limited liability etc.,Discovery,google/flan-t5-xxl,30,X,,"from below text, What is the Legal entity Type of the organization # ex; Public limited or publicly traded or limited liability or Private limited? etc.",,
18,What is the turnover or revenue of the organization?,KYCSummary,meta-llama/llama-2-70b-chat,30,X,,"from below text, find the turnover or revenue of the organization #?",,
19,Certificate/licence issued by the government.,Discovery,google/flan-t5-xxl,20,,,"from below text, What is the Certificate/licence issued by the government for company #?",,
20,Whats is the next date of confirmation statement?,Discovery,google/flan-t5-xxl,30,X,BP P.L.C,"from below text, find the next date of confirmation statement for company #?",30/06/24,30/06/24`
20,Whats is the next date of confirmation statement?,Discovery,google/flan-t5-xxl,30,X,BP P.L.C,"from below text, find the next date of confirmation statement for company #?",30/06/24,`

export interface DataExtractionConfig extends DataExtractionQuestionModel {
source: string;
model: string;
tokens: number;
expectedResponse: string;
prompt: string;
cosinePrompt: string;
}

let data: Promise<DataExtractionConfig[]>;
Expand Down Expand Up @@ -71,7 +72,8 @@ export abstract class DataExtractionCsv<A, C> extends DataExtractionApi {
tokens: values[4],
inScope: values[5] === 'X',
prompt: values[7],
expectedResponse: '' + values[8]
expectedResponse: '' + values[8],
cosinePrompt: '' + values[9],
}))
.filter(val => val.id !== 'ID');
})
Expand Down
23 changes: 6 additions & 17 deletions src/services/data-extraction/data-extraction.impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,11 @@ export class DataExtractionImpl extends DataExtractionCsv<WatsonBackends, Contex

const passages: string[] = this.handleDiscoveryResponse(response.result, customer, passagesPerDocument)

const text: string = await this.findRelevantPassages(naturalLanguageQuery, passages)
const question = config.cosinePrompt.replace('#', customer) || naturalLanguageQuery

console.log('1. Text extracted from Discovery:', {naturalLanguageQuery, text})
const text: string = await this.findRelevantPassages(question, passages)

console.log('1. Text extracted from Discovery:', {naturalLanguageQuery, text, cosineQuestion: question, passages})

console.log(text)

Expand All @@ -174,18 +176,9 @@ export class DataExtractionImpl extends DataExtractionCsv<WatsonBackends, Contex
? this.handleDiscoveryPassages(result)
: this.handleDiscoveryResult(result, subject);

const cleanPassages = passages
return passages
.map(stripTags)
.map(stripUrls)

cleanPassages.forEach((cleanPassage: string, index: number) => {
const originalPassage = passages[index]
if (cleanPassage.length !== originalPassage.length) {
console.log('Passage changed', {originalPassage, cleanPassage})
}
})

return cleanPassages
}

filterDocuments(result: DiscoveryV2.QueryResponse, subject: string): DiscoveryV2.QueryResult[] {
Expand Down Expand Up @@ -220,7 +213,7 @@ export class DataExtractionImpl extends DataExtractionCsv<WatsonBackends, Contex

return await queue
.add(async () => {
const relevantPassage = await axios
return axios
.post<{relevant_passage: string} | string>(url, {question, passages})
.then(response => {
if (typeof response.data === 'string') {
Expand All @@ -234,10 +227,6 @@ export class DataExtractionImpl extends DataExtractionCsv<WatsonBackends, Contex

return passages.join('\n')
})

console.log('0. Found relevant passage: ', {relevantPassage})

return relevantPassage
}) as string
}

Expand Down
7 changes: 1 addition & 6 deletions src/utils/discovery-v2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,7 @@ export const createDiscoveryV2 = async (options: UserOptions): Promise<Discovery
console.log('Queued discovery query')

return queue.add(async () => {
console.log('Calling query', args)
const result = await originalQuery(...args)

console.log('Query result', {result})

return result;
return originalQuery(...args)
})
}

Expand Down

0 comments on commit 93c0f2a

Please sign in to comment.