Skip to content

Commit

Permalink
feat(misc): embeddings script and list sources in ai response (#18455)
Browse files Browse the repository at this point in the history
  • Loading branch information
mandarini authored Aug 3, 2023
1 parent 0c0e61e commit e9d50af
Show file tree
Hide file tree
Showing 15 changed files with 1,185 additions and 319 deletions.
53 changes: 53 additions & 0 deletions .github/workflows/generate-embeddings.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Generate embeddings

on:
schedule:
- cron: "0 5 * * 0,4" # sunday, thursday 5AM
workflow_dispatch:
jobs:
cache-and-install:
if: github.repository == 'nrwl/nx'
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [18]

steps:
- name: Checkout
uses: actions/checkout@v3

- name: Install Node.js
uses: actions/setup-node@v3
with:
node-version: 18

- name: Install pnpm
uses: pnpm/action-setup@v2
id: pnpm-install
with:
version: 7
run_install: false

- name: Get pnpm store directory
id: pnpm-cache
shell: bash
run: |
echo "STORE_PATH=$(pnpm store path)" >> $GITHUB_OUTPUT
- name: Setup pnpm cache
uses: actions/cache@v3
with:
path: ${{ steps.pnpm-cache.outputs.STORE_PATH }}
key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
restore-keys: |
${{ runner.os }}-pnpm-store-
- name: Install dependencies
run: pnpm install --no-frozen-lockfile

- name: Run embeddings script
run: pnpm exec nx run tools-documentation-create-embeddings:run-node
env:
NX_NEXT_PUBLIC_SUPABASE_URL: ${{ secrets.NX_NEXT_PUBLIC_SUPABASE_URL }}
NX_SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.NX_SUPABASE_SERVICE_ROLE_KEY }}
NX_OPENAI_KEY: ${{ secrets.NX_OPENAI_KEY }}
31 changes: 22 additions & 9 deletions nx-dev/data-access-ai/src/lib/data-access-ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@ import {
ChatCompletionRequestMessageRoleEnum,
CreateCompletionResponseUsage,
} from 'openai';
import { getMessageFromResponse, sanitizeLinksInResponse } from './utils';
import {
PageSection,
getListOfSources,
getMessageFromResponse,
sanitizeLinksInResponse,
toMarkdownList,
} from './utils';

const openAiKey = process.env['NX_OPENAI_KEY'];
const supabaseUrl = process.env['NX_NEXT_PUBLIC_SUPABASE_URL'];
Expand All @@ -21,9 +27,12 @@ const config = new Configuration({
});
const openai = new OpenAIApi(config);

export async function nxDevDataAccessAi(
query: string
): Promise<{ textResponse: string; usage?: CreateCompletionResponseUsage }> {
export async function nxDevDataAccessAi(query: string): Promise<{
textResponse: string;
usage?: CreateCompletionResponseUsage;
sources: { heading: string; url: string }[];
sourcesMarkdown: string;
}> {
try {
if (!openAiKey) {
throw new ApplicationError('Missing environment variable NX_OPENAI_KEY');
Expand Down Expand Up @@ -80,11 +89,11 @@ export async function nxDevDataAccessAi(
}: CreateEmbeddingResponse = embeddingResponse.data;

const { error: matchError, data: pageSections } = await supabaseClient.rpc(
'match_page_sections',
'match_page_sections_2',
{
embedding,
match_threshold: 0.78,
match_count: 10,
match_count: 15,
min_content_length: 50,
}
);
Expand All @@ -97,13 +106,13 @@ export async function nxDevDataAccessAi(
let tokenCount = 0;
let contextText = '';

for (let i = 0; i < pageSections.length; i++) {
const pageSection = pageSections[i];
for (let i = 0; i < (pageSections as PageSection[]).length; i++) {
const pageSection: PageSection = pageSections[i];
const content = pageSection.content;
const encoded = tokenizer.encode(content);
tokenCount += encoded.text.length;

if (tokenCount >= 1500) {
if (tokenCount >= 2500) {
break;
}

Expand Down Expand Up @@ -163,9 +172,13 @@ export async function nxDevDataAccessAi(

const responseWithoutBadLinks = await sanitizeLinksInResponse(message);

const sources = getListOfSources(pageSections);

return {
textResponse: responseWithoutBadLinks,
usage: response.data.usage,
sources,
sourcesMarkdown: toMarkdownList(sources),
};
} catch (err: unknown) {
if (err instanceof UserError) {
Expand Down
37 changes: 37 additions & 0 deletions nx-dev/data-access-ai/src/lib/utils.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
import { CreateChatCompletionResponse } from 'openai';
export interface PageSection {
id: number;
page_id: number;
content: string;
heading: string;
similarity: number;
slug: string;
url_partial: string | null;
}

export function getMessageFromResponse(
response: CreateChatCompletionResponse
Expand All @@ -11,6 +20,34 @@ export function getMessageFromResponse(
return response.choices[0].message?.content ?? '';
}

export function getListOfSources(
pageSections: PageSection[]
): { heading: string; url: string }[] {
const uniqueUrlPartials = new Set<string | null>();
const result = pageSections
.filter((section) => {
if (section.url_partial && !uniqueUrlPartials.has(section.url_partial)) {
uniqueUrlPartials.add(section.url_partial);
return true;
}
return false;
})
.map((section) => ({
heading: section.heading,
url: `https://nx.dev${section.url_partial}`,
}));

return result;
}

export function toMarkdownList(
sections: { heading: string; url: string }[]
): string {
return sections
.map((section) => `- [${section.heading}](${section.url})`)
.join('\n');
}

export async function sanitizeLinksInResponse(
response: string
): Promise<string> {
Expand Down
20 changes: 18 additions & 2 deletions nx-dev/feature-ai/src/lib/feature-ai.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export function FeatureAi(): JSX.Element {
const [query, setSearchTerm] = useState('');
const [loading, setLoading] = useState(false);
const [feedbackSent, setFeedbackSent] = useState<boolean>(false);
const [sources, setSources] = useState('');

const warning = `
{% callout type="warning" title="Always double check!" %}
Expand All @@ -23,19 +24,33 @@ export function FeatureAi(): JSX.Element {
setLoading(true);
let completeText = '';
let usage;
let sourcesMarkdown = '';
try {
const aiResponse = await nxDevDataAccessAi(query);
completeText = aiResponse.textResponse;
usage = aiResponse.usage;
setSources(
JSON.stringify(aiResponse.sources?.map((source) => source.url))
);
sourcesMarkdown = aiResponse.sourcesMarkdown;
setLoading(false);
} catch (error) {
setError(error as any);
setLoading(false);
}
sendCustomEvent('ai_query', 'ai', 'query', undefined, { query, ...usage });
sendCustomEvent('ai_query', 'ai', 'query', undefined, {
query,
...usage,
});
setFeedbackSent(false);

const sourcesMd = `
{% callout type="info" title="Sources" %}
${sourcesMarkdown}
{% /callout %}`;

setFinalResult(
renderMarkdown(warning + completeText, { filePath: '' }).node
renderMarkdown(warning + completeText + sourcesMd, { filePath: '' }).node
);
};

Expand All @@ -44,6 +59,7 @@ export function FeatureAi(): JSX.Element {
sendCustomEvent('ai_feedback', 'ai', type, undefined, {
query,
result: finalResult,
sources,
});
setFeedbackSent(true);
} catch (error) {
Expand Down
7 changes: 6 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@
"flat": "^5.0.2",
"fork-ts-checker-webpack-plugin": "7.2.13",
"fs-extra": "^11.1.0",
"github-slugger": "^2.0.0",
"gpt3-tokenizer": "^1.1.5",
"html-webpack-plugin": "5.5.0",
"http-server": "14.1.0",
Expand All @@ -191,6 +192,7 @@
"jest": "29.4.3",
"jest-config": "^29.4.1",
"jest-environment-jsdom": "29.4.3",
"jest-environment-node": "^29.4.1",
"jest-resolve": "^29.4.1",
"jest-util": "^29.4.1",
"js-tokens": "^4.0.0",
Expand All @@ -206,6 +208,9 @@
"loader-utils": "2.0.3",
"magic-string": "~0.30.2",
"markdown-factory": "^0.0.6",
"mdast-util-from-markdown": "^1.3.1",
"mdast-util-to-markdown": "^1.5.0",
"mdast-util-to-string": "^3.2.0",
"memfs": "^3.0.1",
"metro-config": "0.76.7",
"metro-resolver": "0.76.7",
Expand Down Expand Up @@ -267,6 +272,7 @@
"typedoc": "0.24.8",
"typedoc-plugin-markdown": "3.15.3",
"typescript": "~5.1.3",
"unist-builder": "^4.0.0",
"unzipper": "^0.10.11",
"url-loader": "^4.1.1",
"use-sync-external-store": "^1.2.0",
Expand Down Expand Up @@ -359,4 +365,3 @@
}
}
}

Loading

1 comment on commit e9d50af

@vercel
Copy link

@vercel vercel bot commented on e9d50af Aug 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

nx-dev – ./

nx-five.vercel.app
nx-dev-git-master-nrwl.vercel.app
nx-dev-nrwl.vercel.app
nx.dev

Please sign in to comment.