Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: rag new schema, standalone package #448

Draft
wants to merge 26 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
523cbd8
feat: create ingest 'publish' script
mantagen Nov 6, 2024
1c06d78
remove merge markers
mantagen Nov 13, 2024
be9d8d5
add tests and key stage helper
mantagen Nov 19, 2024
20ba83e
Merge branch 'main' into feat/ingest-publish-rag
mantagen Nov 19, 2024
ad665b7
publish script
mantagen Nov 20, 2024
63a3ba0
fix publish step and retrieval
mantagen Nov 20, 2024
05ba3c5
rag lesson plan indexes
mantagen Nov 25, 2024
f33933b
Merge branch 'main' into feat/ingest-publish-rag
mantagen Nov 26, 2024
f0d8469
move rag into own package
mantagen Dec 11, 2024
ce1cd63
split out functions into own files and add tests
mantagen Dec 12, 2024
fe64dbd
fix search tests and add faker zod lib
mantagen Dec 12, 2024
1fd47b2
new rag behind feature flag
mantagen Dec 12, 2024
dba54e4
camel case sql query response
mantagen Dec 12, 2024
7485394
fix publishing step
mantagen Dec 18, 2024
a9f6fb3
Merge branch 'main' into feat/ingest-publish-rag
mantagen Dec 18, 2024
282664c
use posthog feature flag
mantagen Dec 18, 2024
55390d3
Merge branch 'main' into feat/ingest-publish-rag
mantagen Jan 6, 2025
af00731
remove shortenKeyStage helper
mantagen Jan 6, 2025
b9d7297
move block above new rag call
mantagen Jan 6, 2025
0be6d10
remove else block
mantagen Jan 6, 2025
4b4eaa2
Merge branch 'main' into feat/ingest-publish-rag
mantagen Jan 7, 2025
f83a6ae
split out search function
mantagen Jan 9, 2025
2e0c962
rename rag index test file
mantagen Jan 13, 2025
3a1f4fb
code style change
mantagen Jan 13, 2025
12f6233
move utils into appropriate places
mantagen Jan 13, 2025
75a7bbc
use map over for
mantagen Jan 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/aila/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"@oakai/db": "*",
"@oakai/exports": "*",
"@oakai/logger": "*",
"@oakai/rag": "*",
"@sentry/nextjs": "^8.35.0",
"@vercel/kv": "^0.2.2",
"ai": "^3.3.26",
Expand Down
94 changes: 68 additions & 26 deletions packages/aila/src/core/prompt/builders/AilaLessonPromptBuilder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { TemplateProps } from "@oakai/core/src/prompts/lesson-assistant";
import { template } from "@oakai/core/src/prompts/lesson-assistant";
import { prisma as globalPrisma } from "@oakai/db/client";
import { aiLogger } from "@oakai/logger";
import { getRelevantLessonPlans } from "@oakai/rag";

import { DEFAULT_RAG_LESSON_PLANS } from "../../../constants";
import { tryWithErrorReporting } from "../../../helpers/errorReporting";
Expand All @@ -12,6 +13,8 @@ import { compressedLessonPlanForRag } from "../../../utils/lessonPlan/compressed
import { fetchLessonPlan } from "../../../utils/lessonPlan/fetchLessonPlan";
import type { RagLessonPlan } from "../../../utils/rag/fetchRagContent";
import { fetchRagContent } from "../../../utils/rag/fetchRagContent";
import { parseKeyStage } from "../../../utils/rag/parseKeyStage";
import { parseSubjects } from "../../../utils/rag/parseSubjects";
import type { AilaServices } from "../../AilaServices";
import { AilaPromptBuilder } from "../AilaPromptBuilder";

Expand Down Expand Up @@ -64,36 +67,75 @@ export class AilaLessonPromptBuilder extends AilaPromptBuilder {

const { title, subject, keyStage, topic } = this._aila?.lessonPlan ?? {};

let relevantLessonPlans: RagLessonPlan[] = [];
await tryWithErrorReporting(async () => {
relevantLessonPlans = await fetchRagContent({
title: title ?? "unknown",
subject,
topic,
keyStage,
id: chatId,
k:
this._aila?.options.numberOfLessonPlansInRag ??
DEFAULT_RAG_LESSON_PLANS,
prisma: globalPrisma,
chatId,
userId,
const NEW_RAG_ENABLED = true;

if (NEW_RAG_ENABLED) {
if (!title || !subject || !keyStage) {
log.error(
"Missing title, subject or keyStage, returning empty content",
);
return {
ragLessonPlans: [],
stringifiedRelevantLessonPlans: noRelevantLessonPlans,
};
}

const keyStageSlugs = keyStage ? [parseKeyStage(keyStage)] : null;
const subjectSlugs = subject ? parseSubjects(subject) : null;

const relevantLessonPlans = await getRelevantLessonPlans({
title,
keyStageSlugs,
subjectSlugs,
});
}, "Did not fetch RAG content. Continuing");
const stringifiedRelevantLessonPlans = JSON.stringify(
relevantLessonPlans,
null,
2,
);

log.info("Fetched relevant lesson plans", relevantLessonPlans.length);
const stringifiedRelevantLessonPlans = JSON.stringify(
relevantLessonPlans,
null,
2,
);
return {
ragLessonPlans: relevantLessonPlans.map((l) => ({
...l.lessonPlan,
id: l.ragLessonPlanId,
})),
stringifiedRelevantLessonPlans,
};
} else {
let relevantLessonPlans: RagLessonPlan[] = [];
await tryWithErrorReporting(async () => {
relevantLessonPlans = await fetchRagContent({
title: title ?? "unknown",
subject,
topic,
keyStage,
id: chatId,
k:
this._aila?.options.numberOfLessonPlansInRag ??
DEFAULT_RAG_LESSON_PLANS,
prisma: globalPrisma,
chatId,
userId,
});
}, "Did not fetch RAG content. Continuing");

log.info("Got RAG content, length:", stringifiedRelevantLessonPlans.length);
log.info("Fetched relevant lesson plans", relevantLessonPlans.length);
const stringifiedRelevantLessonPlans = JSON.stringify(
relevantLessonPlans,
null,
2,
);

return {
ragLessonPlans: relevantLessonPlans,
stringifiedRelevantLessonPlans,
};
log.info(
"Got RAG content, length:",
stringifiedRelevantLessonPlans.length,
);

return {
ragLessonPlans: relevantLessonPlans,
stringifiedRelevantLessonPlans,
};
}
}

private systemPrompt(
Expand Down
6 changes: 3 additions & 3 deletions packages/aila/src/features/rag/AilaRag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ import { minifyLessonPlanForRelevantLessons } from "../../utils/lessonPlan/minif
const log = aiLogger("aila:rag");

export class AilaRag implements AilaRagFeature {
private _aila: AilaServices;
private _rag: RAG;
private _prisma: PrismaClientWithAccelerate;
private readonly _aila: AilaServices;
private readonly _rag: RAG;
private readonly _prisma: PrismaClientWithAccelerate;

constructor({
aila,
Expand Down
3 changes: 1 addition & 2 deletions packages/aila/src/protocol/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,7 @@ export const LessonPlanJsonSchema = zodToJsonSchema(
);

const AilaRagRelevantLessonSchema = z.object({
// @todo add this after next ingest
// oakLessonId: z.number(),
oakLessonId: z.number().nullish(),
lessonPlanId: z.string(),
title: z.string(),
});
Expand Down
20 changes: 20 additions & 0 deletions packages/aila/src/utils/rag/parseKeyStage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const keyStageMap: Record<string, string> = {
1: "ks1",
2: "ks2",
3: "ks3",
4: "ks4",
5: "ks5",
keystage1: "ks1",
keystage2: "ks2",
keystage3: "ks3",
keystage4: "ks4",
keystage5: "ks5",
eyfs: "early-years-foundation-stage",
};

export function parseKeyStage(maybeKeyStage: string): string {
maybeKeyStage = maybeKeyStage.toLowerCase().replace(/[^a-z0-9]/g, "");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sonar might complain here about reassigning an argument

const keyStageSlug = keyStageMap[maybeKeyStage];

return keyStageSlug ?? maybeKeyStage;
}
17 changes: 17 additions & 0 deletions packages/aila/src/utils/rag/parseSubjects.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
const subjectMap: Record<string, string[]> = {
science: ["biology", "chemistry", "physics", "science", "combined-science"],
biology: ["biology", "science", "combined-science"],
chemistry: ["chemistry", "science", "combined-science"],
physics: ["physics", "science", "combined-science"],
"combined-science": [
"combined-science",
"science",
"biology",
"chemistry",
"physics",
],
};

export function parseSubjects(subject: string): string[] {
return subjectMap[subject] ?? [subject];
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ These Oak lessons might be relevant:
1. Introduction to the Periodic Table
2. Chemical Reactions and Equations
3. The Structure of the Atom
4. The Mole Concept
5. Acids, Bases and Salts
\n
To base your lesson on one of these existing Oak lessons, type the lesson number. Tap **Continue** to start from scratch.
END OF EXAMPLE RESPONSE`,
Expand Down
37 changes: 24 additions & 13 deletions packages/core/src/rag/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { DEFAULT_CATEGORISE_MODEL } from "../../../aila/src/constants";
import type { OpenAICompletionWithLoggingOptions } from "../../../aila/src/lib/openai/OpenAICompletionWithLogging";
import { OpenAICompletionWithLogging } from "../../../aila/src/lib/openai/OpenAICompletionWithLogging";
import type { JsonValue } from "../models/prompts";
import { shortenKeyStage } from "../utils/shortenKeyStage";
import { slugify } from "../utils/slugify";
import { keyStages, subjects } from "../utils/subjects";
import { CategoriseKeyStageAndSubjectResponse } from "./categorisation";
Expand Down Expand Up @@ -552,19 +553,19 @@ Thank you and happy classifying!`;
if (!keyStage) {
return null;
}
let cachedKeyStage: KeyStage | null;
try {
cachedKeyStage = await kv.get<KeyStage>(`keyStage:${keyStage}`);
if (cachedKeyStage) {
return cachedKeyStage;
}
} catch (e) {
log.error(
"Error parsing cached keyStage. Continuing without cached value",
e,
);
await kv.del(`keyStage:${keyStage}`);
}
// let cachedKeyStage: KeyStage | null;
// try {
// cachedKeyStage = await kv.get<KeyStage>(`keyStage:${keyStage}`);
// if (cachedKeyStage) {
// return cachedKeyStage;
// }
// } catch (e) {
// log.error(
// "Error parsing cached keyStage. Continuing without cached value",
// e,
// );
// await kv.del(`keyStage:${keyStage}`);
// }

let foundKeyStage: KeyStage | null = null;
foundKeyStage = await this.prisma.keyStage.findFirst({
Expand All @@ -575,6 +576,12 @@ Thank you and happy classifying!`;
{ slug: slugify(keyStage) },
{ title: { equals: keyStage.toLowerCase(), mode: "insensitive" } },
{ slug: { equals: keyStage.toLowerCase(), mode: "insensitive" } },
{
slug: {
equals: shortenKeyStage(slugify(keyStage)),
mode: "insensitive",
},
},
],
},
cacheStrategy: { ttl: 60 * 5, swr: 60 * 2 },
Expand Down Expand Up @@ -692,6 +699,8 @@ Thank you and happy classifying!`;
};
}

log.info("Filter:", filter);

const vectorStore = PrismaVectorStore.withModel<LessonPlanPart>(
this.prisma,
).create(
Expand Down Expand Up @@ -723,6 +732,8 @@ Thank you and happy classifying!`;
similaritySearchTerm,
k * 5, // search for more records than we need
);

log.info("Initial search result", result);
} catch (e) {
if (e instanceof TypeError && e.message.includes("join([])")) {
log.warn("Caught TypeError with join([]), returning empty array");
Expand Down
10 changes: 10 additions & 0 deletions packages/core/src/utils/shortenKeyStage.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export const shortenKeyStage = (keyStage: string) => {
const keyStageMap: Record<string, string> = {
"key-stage-1": "KS1",
"key-stage-2": "KS2",
"key-stage-3": "KS3",
"key-stage-4": "KS4",
};

return keyStageMap[keyStage] || keyStage;
};
2 changes: 2 additions & 0 deletions packages/db/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
"db-migrate": "pnpm with-env prisma migrate dev",
"db-migrate:dev": "pnpm with-env prisma migrate dev",
"db-migrate:status": "pnpm with-env prisma migrate status",
"db-migrate:status:stg": "DB_ENV=stg doppler run --config stg -- prisma migrate status",
"db-migrate:status:prd": "DB_ENV=prd doppler run --config prd -- prisma migrate status",
"db-migrate-resolve-applied:prd": "doppler run --config prd -- prisma migrate resolve --applied",
"db-migrate-resolve-applied:stg": "doppler run --config stg -- prisma migrate resolve --applied",
"db-migrate-resolve-rolled-back:prd": "doppler run --config prd -- prisma migrate resolve --rolled-back",
Expand Down
3 changes: 3 additions & 0 deletions packages/db/prisma/additions/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Schema additions

This directory houses SQL additions to the Prisma schema where the Prisma schema is not expressive enough to capture the desired schema.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
CREATE INDEX IF NOT EXISTS idx_rag_lesson_plan_parts_embedding_ann
ON rag.rag_lesson_plan_parts
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE UNIQUE INDEX IF NOT EXISTS idx_rag_lesson_plans_unique_published_oak_lesson_slug
ON rag.rag_lesson_plans (oak_lesson_slug)
WHERE is_published = TRUE;
25 changes: 15 additions & 10 deletions packages/db/prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -1117,16 +1117,21 @@ model IngestError {
}

model RagLessonPlan {
id String @id @default(cuid())
oakLessonId Int @map("oak_lesson_id")
ingestLessonId String? @map("ingest_lesson_id")
lessonPlan Json @map("lesson_plan") @db.JsonB
subjectSlug String @map("subject_slug")
keyStageSlug String @map("key_stage_slug")
createdAt DateTime @default(now()) @map("created_at")
updatedAt DateTime @updatedAt @map("updated_at")
ragLessonPlanPart RagLessonPlanPart[]

id String @id @default(cuid())
oakLessonId Int? @map("oak_lesson_id")
oakLessonSlug String @map("oak_lesson_slug")
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adds slug as a mandatory field

ingestLessonId String? @map("ingest_lesson_id")
lessonPlan Json @map("lesson_plan") @db.JsonB
subjectSlug String @map("subject_slug")
keyStageSlug String @map("key_stage_slug")
isPublished Boolean @default(false) @map("is_published")
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adds isPublished field

createdAt DateTime @default(now()) @map("created_at")
updatedAt DateTime @updatedAt @map("updated_at")
ragLessonPlanParts RagLessonPlanPart[]
// The following index is not supported by prisma so is applied manually in ./additions/rag_lesson_plans_unique_slug_index.sql
// @@index([oakLessonSlug], name: "unique_published_oak_lesson_slug", dbIndex: false) @db.PartialIndex("is_published = TRUE")

@@index([isPublished, keyStageSlug, subjectSlug], name: "idx_rag_lesson_plans_published_key_stage_subject")
@@map("rag_lesson_plans")
@@schema("rag")
}
Expand Down
1 change: 1 addition & 0 deletions packages/ingest/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"@oakai/core": "*",
"@oakai/db": "*",
"@oakai/logger": "*",
"@paralleldrive/cuid2": "^2.2.2",
"csv-parser": "^3.0.0",
"graphql-request": "^6.1.0",
"webvtt-parser": "^2.2.0",
Expand Down
3 changes: 2 additions & 1 deletion packages/ingest/src/_data/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.jsonl
*.json
*.csv
*.csv
*
2 changes: 2 additions & 0 deletions packages/ingest/src/config/ingestConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,7 @@ export const IngestConfigSchema = z.object({
filePath: z.string(),
}),
]),
title: z.string().optional(),
description: z.string().optional(),
});
export type IngestConfig = z.infer<typeof IngestConfigSchema>;
1 change: 1 addition & 0 deletions packages/ingest/src/db-helpers/step.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export const STEP = [
"lesson_plan_generation",
"chunking",
"embedding",
"publishing",
] as const;

const STEP_STATUS = ["started", "completed", "failed"] as const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { CompletedLessonPlanSchema } from "@oakai/aila/src/protocol/schema";

import { IngestError } from "../IngestError";
import { CompletionBatchResponseSchema } from "../zod-schema/zodSchema";
import { parseKeyStage } from "./parseKeyStage";

export function parseBatchLessonPlan(line: unknown) {
let result;
Expand Down Expand Up @@ -37,6 +38,8 @@ export function parseBatchLessonPlan(line: unknown) {
lessonPlan = CompletedLessonPlanSchema.parse(
JSON.parse(maybeLessonPlanString),
);

lessonPlan.keyStage = parseKeyStage(lessonPlan.keyStage);
codeincontext marked this conversation as resolved.
Show resolved Hide resolved
} catch (cause) {
throw new IngestError("Failed to parse lesson plan", {
cause,
Expand Down
Loading
Loading