feat(misc): add memory to chat completion (#18463)

nrwl · Aug 8, 2023 · 8be2a26 · 8be2a26 · vercel · Aug 8, 2023
1 parent 7de3046
commit 8be2a26
Show file tree

Hide file tree

Showing 4 changed files with 280 additions and 66 deletions.
diff --git a/nx-dev/data-access-ai/src/lib/data-access-ai.ts b/nx-dev/data-access-ai/src/lib/data-access-ai.ts
@@ -8,17 +8,31 @@ import {
   OpenAIApi,
   CreateModerationResponse,
   CreateEmbeddingResponse,
-  ChatCompletionRequestMessageRoleEnum,
   CreateCompletionResponseUsage,
 } from 'openai';
 import {
+  ApplicationError,
+  ChatItem,
   PageSection,
+  UserError,
+  checkEnvVariables,
   getListOfSources,
   getMessageFromResponse,
+  initializeChat,
   sanitizeLinksInResponse,
   toMarkdownList,
 } from './utils';
 
+const DEFAULT_MATCH_THRESHOLD = 0.78;
+const DEFAULT_MATCH_COUNT = 15;
+const MIN_CONTENT_LENGTH = 50;
+
+// This limits history to 30 messages back and forth
+// It's arbitrary, but also generous
+// History length should be based on token count
+// This is a temporary solution
+const MAX_HISTORY_LENGTH = 30;
+
 const openAiKey = process.env['NX_OPENAI_KEY'];
 const supabaseUrl = process.env['NX_NEXT_PUBLIC_SUPABASE_URL'];
 const supabaseServiceKey = process.env['NX_SUPABASE_SERVICE_ROLE_KEY'];
@@ -27,34 +41,34 @@ const config = new Configuration({
 });
 const openai = new OpenAIApi(config);
 
-export async function nxDevDataAccessAi(query: string): Promise<{
+let chatFullHistory: ChatItem[] = [];
+
+let totalTokensSoFar = 0;
+
+export async function nxDevDataAccessAi(
+  query: string,
+  aiResponse?: string
+): Promise<{
   textResponse: string;
   usage?: CreateCompletionResponseUsage;
   sources: { heading: string; url: string }[];
   sourcesMarkdown: string;
 }> {
-  try {
-    if (!openAiKey) {
-      throw new ApplicationError('Missing environment variable NX_OPENAI_KEY');
-    }
-
-    if (!supabaseUrl) {
-      throw new ApplicationError(
-        'Missing environment variable NX_NEXT_PUBLIC_SUPABASE_URL'
-      );
-    }
+  if (chatFullHistory.length > MAX_HISTORY_LENGTH) {
+    chatFullHistory.slice(0, MAX_HISTORY_LENGTH - 4);
+  }
 
-    if (!supabaseServiceKey) {
-      throw new ApplicationError(
-        'Missing environment variable NX_SUPABASE_SERVICE_ROLE_KEY'
-      );
-    }
+  try {
+    checkEnvVariables(openAiKey, supabaseUrl, supabaseServiceKey);
 
     if (!query) {
       throw new UserError('Missing query in request data');
     }
 
-    const supabaseClient = createClient(supabaseUrl, supabaseServiceKey);
+    const supabaseClient = createClient(
+      supabaseUrl as string,
+      supabaseServiceKey as string
+    );
 
     // Moderate the content to comply with OpenAI T&C
     const sanitizedQuery = query.trim();
@@ -72,9 +86,27 @@ export async function nxDevDataAccessAi(query: string): Promise<{
     }
 
     // Create embedding from query
+    // NOTE: Here, we may or may not want to include the previous AI response
+    /**
+     * For retrieving relevant Nx documentation sections via embeddings, it's a design decision.
+     * Including the prior response might give more contextually relevant sections,
+     * but just sending the query might suffice for many cases.
+     *
+     * We can experiment with this.
+     *
+     * How the solution looks like with previous response:
+     *
+     *     const embeddingResponse = await openai.createEmbedding({
+     *         model: 'text-embedding-ada-002',
+     *         input: sanitizedQuery + aiResponse,
+     *       });
+     *
+     * This costs more tokens, so if we see conts skyrocket we remove it.
+     * As it says in the docs, it's a design decision, and it may or may not really improve results.
+     */
     const embeddingResponse = await openai.createEmbedding({
       model: 'text-embedding-ada-002',
-      input: sanitizedQuery,
+      input: sanitizedQuery + aiResponse,
     });
 
     if (embeddingResponse.status !== 200) {
@@ -92,16 +124,24 @@ export async function nxDevDataAccessAi(query: string): Promise<{
       'match_page_sections_2',
       {
         embedding,
-        match_threshold: 0.78,
-        match_count: 15,
-        min_content_length: 50,
+        match_threshold: DEFAULT_MATCH_THRESHOLD,
+        match_count: DEFAULT_MATCH_COUNT,
+        min_content_length: MIN_CONTENT_LENGTH,
       }
     );
 
     if (matchError) {
       throw new ApplicationError('Failed to match page sections', matchError);
     }
 
+    // Note: this is experimental. I think it should work
+    // mainly because we're testing previous response + query.
+    if (!pageSections || pageSections.length === 0) {
+      throw new UserError(
+        'Nothing relevant found in the Nx documentation! Please try another query.'
+      );
+    }
+
     const tokenizer = new GPT3Tokenizer({ type: 'gpt3' });
     let tokenCount = 0;
     let contextText = '';
@@ -122,39 +162,33 @@ export async function nxDevDataAccessAi(query: string): Promise<{
     const prompt = `
       ${`
       You are a knowledgeable Nx representative. 
-      Your knowledge is based entirely on the official Nx documentation. 
-      You should answer queries using ONLY that information.
+      Your knowledge is based entirely on the official Nx Documentation. 
+      You can answer queries using ONLY that information.
+      You cannot answer queries using your own knowledge or experience.
       Answer in markdown format. Always give an example, answer as thoroughly as you can, and
       always provide a link to relevant documentation
       on the https://nx.dev website. All the links you find or post 
       that look like local or relative links, always prepend with "https://nx.dev".
-      Your answer should be in the form of a Markdown article, much like the
-      existing Nx documentation. Include a title, and subsections, if it makes sense.
-      Mark the titles and the subsections with the appropriate markdown syntax.
-      If you are unsure and the answer is not explicitly written in the Nx documentation, say
-      "Sorry, I don't know how to help with that. 
-      You can visit the [Nx documentation](https://nx.dev/getting-started/intro) for more info."
-      Remember, answer the question using ONLY the information provided in the Nx documentation.
-      Answer as markdown (including related code snippets if available).
+      Your answer should be in the form of a Markdown article 
+      (including related code snippets if available), much like the
+      existing Nx documentation. Mark the titles and the subsections with the appropriate markdown syntax.
+      If you are unsure and cannot find an answer in the Nx Documentation, say
+      "Sorry, I don't know how to help with that. You can visit the [Nx documentation](https://nx.dev/getting-started/intro) for more info."
+      Remember, answer the question using ONLY the information provided in the Nx Documentation.
       `
         .replace(/\s+/g, ' ')
         .trim()}
     `;
 
-    const chatGptMessages = [
-      {
-        role: ChatCompletionRequestMessageRoleEnum.System,
-        content: prompt,
-      },
-      {
-        role: ChatCompletionRequestMessageRoleEnum.Assistant,
-        content: contextText,
-      },
-      {
-        role: ChatCompletionRequestMessageRoleEnum.User,
-        content: sanitizedQuery,
-      },
-    ];
+    const { chatMessages: chatGptMessages, chatHistory } = initializeChat(
+      chatFullHistory,
+      query,
+      contextText,
+      prompt,
+      aiResponse
+    );
+
+    chatFullHistory = chatHistory;
 
     const response = await openai.createChatCompletion({
       model: 'gpt-3.5-turbo-16k',
@@ -174,6 +208,8 @@ export async function nxDevDataAccessAi(query: string): Promise<{
 
     const sources = getListOfSources(pageSections);
 
+    totalTokensSoFar += response.data.usage?.total_tokens ?? 0;
+
     return {
       textResponse: responseWithoutBadLinks,
       usage: response.data.usage,
@@ -196,10 +232,12 @@ export async function nxDevDataAccessAi(query: string): Promise<{
     throw err;
   }
 }
-export class ApplicationError extends Error {
-  constructor(message: string, public data: Record<string, any> = {}) {
-    super(message);
-  }
+
+export function resetHistory() {
+  chatFullHistory = [];
+  totalTokensSoFar = 0;
 }
 
-export class UserError extends ApplicationError {}
+export function getHistory(): ChatItem[] {
+  return chatFullHistory;
+}
diff --git a/nx-dev/data-access-ai/src/lib/utils.ts b/nx-dev/data-access-ai/src/lib/utils.ts
@@ -1,4 +1,8 @@
-import { CreateChatCompletionResponse } from 'openai';
+import {
+  ChatCompletionRequestMessageRoleEnum,
+  CreateChatCompletionResponse,
+} from 'openai';
+import { getHistory } from './data-access-ai';
 export interface PageSection {
   id: number;
   page_id: number;
@@ -12,11 +16,6 @@ export interface PageSection {
 export function getMessageFromResponse(
   response: CreateChatCompletionResponse
 ): string {
-  /**
-   *
-   * This function here will or may be enhanced
-   * once we add more functionality
-   */
   return response.choices[0].message?.content ?? '';
 }
 
@@ -33,7 +32,7 @@ export function getListOfSources(
       return false;
     })
     .map((section) => ({
-      heading: section.heading,
+      heading: section.heading ?? section.url_partial,
       url: `https://nx.dev${section.url_partial}`,
     }));
 
@@ -85,3 +84,105 @@ async function is404(url: string): Promise<boolean> {
     }
   }
 }
+
+export function checkEnvVariables(
+  openAiKey?: string,
+  supabaseUrl?: string,
+  supabaseServiceKey?: string
+) {
+  if (!openAiKey) {
+    throw new ApplicationError('Missing environment variable NX_OPENAI_KEY');
+  }
+  if (!supabaseUrl) {
+    throw new ApplicationError(
+      'Missing environment variable NX_NEXT_PUBLIC_SUPABASE_URL'
+    );
+  }
+  if (!supabaseServiceKey) {
+    throw new ApplicationError(
+      'Missing environment variable NX_SUPABASE_SERVICE_ROLE_KEY'
+    );
+  }
+}
+
+export class ApplicationError extends Error {
+  constructor(message: string, public data: Record<string, any> = {}) {
+    super(message);
+  }
+}
+
+export class UserError extends ApplicationError {}
+
+export function initializeChat(
+  chatFullHistory: ChatItem[],
+  query: string,
+  contextText: string,
+  prompt: string,
+  aiResponse?: string
+): { chatMessages: ChatItem[]; chatHistory: ChatItem[] } {
+  const finalQuery = `
+  You will be provided the Nx Documentation. 
+  Answer my message provided by following the approach below:
+  
+  - Step 1: Identify CLUES (keywords, phrases, contextual information, references) in the input that you could use to generate an answer.
+  - Step 2: Deduce the diagnostic REASONING process from the premises (clues, question), relying ONLY on the information provided in the Nx Documentation. If you recognize vulgar language, answer the question if possible, and educate the user to stay polite.
+  - Step 3: EVALUATE the reasoning. If the reasoning aligns with the Nx Documentation, accept it. Do not use any external knowledge or make assumptions outside of the provided Nx documentation. If the reasoning doesn't strictly align with the Nx Documentation or relies on external knowledge or inference, reject it and answer with the exact string: 
+  "Sorry, I don't know how to help with that. You can visit the [Nx documentation](https://nx.dev/getting-started/intro) for more info."
+  - Final Step: You can also rely on the messages we have exchanged so far. 
+  Nx Documentation: 
+  ${contextText}
+
+  ---- My message: ${query}
+  `;
+  let chatGptMessages: ChatItem[] = [];
+  let messages: ChatItem[] = [];
+
+  if (chatFullHistory.length > 0) {
+    messages = [
+      {
+        role: ChatCompletionRequestMessageRoleEnum.Assistant,
+        content: aiResponse ?? '',
+      },
+      { role: ChatCompletionRequestMessageRoleEnum.User, content: finalQuery },
+    ];
+    chatGptMessages = [...chatFullHistory, ...messages];
+  } else {
+    messages = [
+      { role: ChatCompletionRequestMessageRoleEnum.System, content: prompt },
+      { role: ChatCompletionRequestMessageRoleEnum.User, content: finalQuery },
+    ];
+    chatGptMessages = [...messages];
+  }
+
+  chatFullHistory.push(...messages);
+
+  return { chatMessages: chatGptMessages, chatHistory: chatFullHistory };
+}
+
+export function extractQuery(text: string) {
+  const regex = /---- My message: (.+)/;
+  const match = text.match(regex);
+  return match ? match[1].trim() : text;
+}
+
+export function getProcessedHistory(): ChatItem[] {
+  let history = getHistory();
+  history = history
+    .map((item) => {
+      if (item.role === ChatCompletionRequestMessageRoleEnum.User) {
+        item.content = extractQuery(item.content);
+      }
+      if (item.role !== ChatCompletionRequestMessageRoleEnum.System) {
+        return item;
+      } else {
+        return undefined;
+      }
+    })
+    .filter((item) => !!item) as ChatItem[];
+  return history;
+}
+
+export interface ChatItem {
+  role: ChatCompletionRequestMessageRoleEnum;
+  content: string;
+}