diff --git a/.env b/.env index 367ef82d..c1684d8a 100644 --- a/.env +++ b/.env @@ -19,8 +19,8 @@ NEXTAUTH_URL_INTERNAL="${NEXT_PUBLIC_HOST}" # Clickhouse Variables CLICK_HOUSE_HOST="http://langtrace-clickhouse:8123" -CLICK_HOUSE_USER="default" -CLICK_HOUSE_PASSWORD="" +CLICK_HOUSE_USER="lt_clickhouse_user" +CLICK_HOUSE_PASSWORD="clickhousepw" CLICK_HOUSE_DATABASE_NAME="langtrace_traces" # Admin login diff --git a/app/(protected)/project/[project_id]/evaluate/[test_id]/page.tsx b/app/(protected)/project/[project_id]/evaluate/[test_id]/page.tsx deleted file mode 100644 index 59c7004a..00000000 --- a/app/(protected)/project/[project_id]/evaluate/[test_id]/page.tsx +++ /dev/null @@ -1,568 +0,0 @@ -"use client"; - -import { ScaleType } from "@/components/evaluate/eval-scale-picker"; -import { RangeScale } from "@/components/evaluate/range-scale"; -import UserLogo from "@/components/shared/user-logo"; -import { VendorLogo } from "@/components/shared/vendor-metadata"; -import { Button } from "@/components/ui/button"; -import { Skeleton } from "@/components/ui/skeleton"; -import { correctTimestampFormat } from "@/lib/trace_utils"; -import { - cn, - extractSystemPromptFromLlmInputs, - formatDateTime, - safeStringify, -} from "@/lib/utils"; -import { Cross1Icon, EnterIcon } from "@radix-ui/react-icons"; -import { ProgressCircle } from "@tremor/react"; -import { - ArrowDownSquareIcon, - ArrowUpSquareIcon, - CheckIcon, - ChevronLeft, - ChevronRight, - DeleteIcon, -} from "lucide-react"; -import { useParams, useRouter, useSearchParams } from "next/navigation"; -import { useEffect, useState } from "react"; -import { useQuery, useQueryClient } from "react-query"; -import { toast } from "sonner"; - -export default function Page() { - const router = useRouter(); - const projectId = useParams()?.project_id as string; - const testId = useParams()?.test_id as string; - const page = parseInt(useSearchParams()?.get("page") || "1"); - // const spanId = useSearchParams()?.get("span_id"); - - const { isLoading: isTestLoading, data: testData } = useQuery({ - queryKey: ["fetch-test-query", testId], - queryFn: async () => { - const response = await fetch(`/api/test?id=${testId}`); - if (!response.ok) { - const error = await response.json(); - toast.error("Failed to fetch the test", { - description: error?.message || "Failed to fetch test", - }); - router.push(`/project/${projectId}/evaluate`); - return; - } - const result = await response.json(); - return result; - }, - }); - - const [score, setScore] = useState(testData?.test?.min ?? -1); - const [scorePercent, setScorePercent] = useState(0); - const [color, setColor] = useState("red"); - const [span, setSpan] = useState(null); - const [userScore, setUserScore] = useState(); - const [userScorePercent, setUserScorePercent] = useState(0); - const [userScoreColor, setUserScoreColor] = useState("red"); - const [totalPages, setTotalPages] = useState(1); - const queryClient = useQueryClient(); - - const { isLoading: isSpanLoading } = useQuery({ - queryKey: ["fetch-spans-query", page, testData?.test?.id], - queryFn: async () => { - const filters = [ - { - key: "llm.prompts", - operation: "NOT_EQUALS", - value: "", - }, - ]; - - // convert filterserviceType to a string - const apiEndpoint = "/api/spans"; - const body = { - page, - pageSize: 1, - projectId: projectId, - filters: filters, - filterOperation: "AND", - }; - - const response = await fetch(apiEndpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify(body), - }); - - if (!response.ok) { - const error = await response.json(); - toast.error("Failed to fetch the span data", { - description: error?.message || "Failed to fetch the span data", - }); - router.push(`/project/${projectId}/evaluate`); - return; - } - - const result = await response.json(); - return result; - }, - onSuccess: (data) => { - // Get the newly fetched data and metadata - const spans = data?.spans?.result || []; - const metadata = data?.spans?.metadata || {}; - - if ( - metadata?.total_pages <= 0 || - page <= 0 || - page > parseInt(metadata?.total_pages) - ) { - router.push(`/project/${projectId}/evaluate`); - } - - // Update the total pages and current page number - setTotalPages(parseInt(metadata?.total_pages) || 1); - - // Update the span state - if (spans.length > 0) { - if (spans[0]?.attributes) { - const attributes = JSON.parse(spans[0]?.attributes); - setUserScore(attributes["user.feedback.rating"] || ""); - if (attributes["user.feedback.rating"] === 1) { - setUserScorePercent(100); - setUserScoreColor("green"); - } - } - setSpan(spans[0]); - } - }, - enabled: !!testData, - }); - - const next = async () => { - // Evaluate the current score - evaluate(); - if (page < totalPages) { - const nextPage = page + 1; - router.push(`/project/${projectId}/evaluate/${testId}?page=${nextPage}`); - } - }; - - const previous = () => { - if (page > 1) { - const previousPage = page - 1; - router.push( - `/project/${projectId}/evaluate/${testId}?page=${previousPage}` - ); - } - }; - - const { isLoading: isEvaluationLoading, data: evaluationsData } = useQuery({ - queryKey: ["fetch-evaluation-query", span?.span_id], - queryFn: async () => { - const response = await fetch(`/api/evaluation?spanId=${span?.span_id}`); - if (!response.ok) { - const error = await response.json(); - toast.error("Failed to fetch the evaluation data", { - description: error?.message || "Failed to fetch the evaluation data", - }); - router.push(`/project/${projectId}/evaluate`); - return; - } - const result = await response.json(); - const sc = - result.evaluations.length > 0 - ? result.evaluations[0].ltUserScore ?? -1 - : -1; - onScoreSelected(sc); - return result; - }, - enabled: !!span, - }); - - useEffect(() => { - const handleKeyPress = (event: any) => { - if (event.key === "Enter") { - next(); - } - if (event.key === "Backspace") { - previous(); - } - - if (event.key === "Escape") { - router.push(`/project/${projectId}/evaluate`); - } - }; - - // Add event listener - window.addEventListener("keydown", handleKeyPress); - - // Remove event listener on cleanup - return () => { - window.removeEventListener("keydown", handleKeyPress); - }; - }, [totalPages, page, evaluationsData, score]); - - const evaluate = async () => { - // setBusy(true); - try { - const attributes = span?.attributes ? JSON.parse(span.attributes) : {}; - if (Object.keys(attributes).length === 0) return; - const model = attributes["llm.model"]; - const prompts = attributes["llm.prompts"]; - const systemPrompt = extractSystemPromptFromLlmInputs(prompts); - - // Check if an evaluation already exists - if (evaluationsData?.evaluations[0]?.id) { - if (evaluationsData.evaluations[0].ltUserScore === score) { - // setBusy(false); - return; - } - // Update the existing evaluation - await fetch("/api/evaluation", { - method: "PUT", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - id: evaluationsData.evaluations[0].id, - ltUserScore: score, - testId, - }), - }); - queryClient.invalidateQueries({ - queryKey: ["fetch-evaluation-query", span?.span_id], - }); - } else { - // Create a new evaluation - await fetch("/api/evaluation", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - projectId: projectId, - spanId: span.span_id, - traceId: span.trace_id, - ltUserScore: score, - testId: testId, - }), - }); - queryClient.invalidateQueries({ - queryKey: ["fetch-evaluation-query", span?.span_id], - }); - } - } catch (error: any) { - toast.error("Error evaluating the span!", { - description: `There was an error evaluating the span: ${error.message}`, - }); - } finally { - // setBusy(false); - } - }; - - const onScoreSelected = (value: number) => { - setScore(value); - - // Calculate the percentage of the score using min, max and step - const range = testData?.test?.max - testData?.test?.min; - const steps = range / testData?.test?.step; - const scorePercent = ((value - testData?.test?.min) / steps) * 100; - setScorePercent(scorePercent); - - if (scorePercent < 33) { - setColor("red"); - } - if (scorePercent >= 33 && scorePercent < 66) { - setColor("yellow"); - } - if (scorePercent >= 66) { - setColor("green"); - } - }; - - if (isTestLoading || isSpanLoading) { - return ; - } else { - return ( -
- -
-
-
-

- {testData?.test?.name || "No name provided"} -

-

- {testData?.test?.description || "No description provided"} -

-
-
-

- {page}/{totalPages} -

- - - {Math.round((page / totalPages) * 100)}% - - -
-
-
-
-

- Evaluation Scale{" "} -

-

- {testData?.test?.min} to {testData?.test?.max} in steps of + - {testData?.test?.step} -

-
- {span?.start_time && ( -
-

- Timestamp -

-

- {formatDateTime(correctTimestampFormat(span?.start_time))} -

-
- )} -
-

- Scale - - {evaluationsData?.evaluations[0]?.id - ? "Evaluated" - : "Not Evaluated"} - -

- {isEvaluationLoading ? ( -
- {[1, 2, 3, 4].map((item) => ( - - ))} -
- ) : ( - - )} -
-
-

- Evaluted Score -

- -

{score}

-
-
-
-

User Score

- {userScore ? ( - -

{userScore}

-
- ) : ( -

- Not evaluated -

- )} -
-
-
-

Hotkeys

-
- - -

Arrow keys to navigate the scale

-
-
- -

- Enter/Return to submit the score and continue to the next - evaluation -

-
-
- -

- Delete/Backspace to go back to the previous evaluation -

-
-
-

Esc

-

Press Esc to exit the evaluation dialog

-
-
-
-
- - - -
-
- ); - } -} - -function ConversationView({ span }: { span: any }) { - const attributes = span?.attributes ? JSON.parse(span.attributes) : {}; - if (!attributes) return

No data found

; - - const prompts = attributes["llm.prompts"]; - const responses = attributes["llm.responses"]; - - if (!prompts && !responses) return

No data found

; - - return ( -
- {prompts?.length > 0 && - JSON.parse(prompts).map((prompt: any, i: number) => { - const role = prompt?.role ? prompt?.role?.toLowerCase() : "User"; - const content = prompt?.content - ? safeStringify(prompt?.content) - : prompt?.function_call - ? safeStringify(prompt?.function_call) - : "No input found"; - return ( -
-
- {role === "user" ? ( - - ) : ( - - )} -

{role}

- {role === "system" && ( -

- Prompt -

- )} -
-
-
- ); - })} - {responses?.length > 0 && - JSON.parse(responses).map((response: any, i: number) => { - const role = - response?.role?.toLowerCase() || - response?.message?.role || - "Assistant"; - const content = - safeStringify(response?.content) || - safeStringify(response?.message?.content) || - safeStringify(response?.text) || - "No output found"; - return ( -
-
- {role === "user" ? ( - - ) : ( - - )} -

{role}

-
-
-
- ); - })} -
- ); -} - -function EvalDialogSkeleton() { - return ( -
-
- - - - - - - -
-
- - - - - -
- - -
-
- - -
-
- - -
-
- - -
-
-
- ); -} diff --git a/app/(protected)/project/[project_id]/evaluate/page-client.tsx b/app/(protected)/project/[project_id]/evaluate/page-client.tsx index 97bfb8ca..adb86b11 100644 --- a/app/(protected)/project/[project_id]/evaluate/page-client.tsx +++ b/app/(protected)/project/[project_id]/evaluate/page-client.tsx @@ -8,14 +8,11 @@ import EvaluationTable, { EvaluationTableSkeleton, } from "@/components/evaluate/evaluation-table"; import { AddtoDataset } from "@/components/shared/add-to-dataset"; -import { Button } from "@/components/ui/button"; import { Separator } from "@/components/ui/separator"; import { Skeleton } from "@/components/ui/skeleton"; -import { cn, getChartColor } from "@/lib/utils"; +import { cn } from "@/lib/utils"; import { Test } from "@prisma/client"; -import { ProgressCircle } from "@tremor/react"; -import { ChevronsRight, RabbitIcon } from "lucide-react"; -import Link from "next/link"; +import { RabbitIcon } from "lucide-react"; import { useParams } from "next/navigation"; import { useState } from "react"; import { useQuery } from "react-query"; @@ -29,36 +26,17 @@ interface CheckedData { export default function PageClient({ email }: { email: string }) { const projectId = useParams()?.project_id as string; - const [selectedTest, setSelectedTest] = useState(); const [selectedData, setSelectedData] = useState([]); const [currentData, setCurrentData] = useState([]); const [page, setPage] = useState(1); const [totalPages, setTotalPages] = useState(1); - const { data: testAverages, isLoading: testAveragesLoading } = useQuery({ - queryKey: [`fetch-test-averages-${projectId}-query`], - queryFn: async () => { - const response = await fetch(`/api/metrics/tests?projectId=${projectId}`); - if (!response.ok) { - const error = await response.json(); - toast.error("Failed to fetch test averages", { - description: error?.message || "Failed to fetch test averages", - }); - return { averages: [] }; - } - const result = await response.json(); - return result; - }, - refetchOnWindowFocus: false, - refetchOnMount: true, - }); - const { data: tests, isLoading: testsLoading, error: testsError, } = useQuery({ - queryKey: [`fetch-tests-${projectId}-query`], + queryKey: ["fetch-tests-query", projectId], queryFn: async () => { const response = await fetch(`/api/test?projectId=${projectId}`); if (!response.ok) { @@ -70,17 +48,12 @@ export default function PageClient({ email }: { email: string }) { // sort tests by created date result.tests.sort( (a: Test, b: Test) => - new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime() + new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime() ); - if (result?.tests?.length > 0) { - setSelectedTest(result?.tests?.[0]); - } - return result; }, refetchOnWindowFocus: false, - enabled: !!testAverages, onError: (error) => { toast.error("Failed to fetch tests", { description: error instanceof Error ? error.message : String(error), @@ -99,143 +72,40 @@ export default function PageClient({ email }: { email: string }) { ); } - const testAverage = - testAverages?.averages?.find((avg: any) => avg.testId === selectedTest?.id) - ?.average || 0; - return ( -
+
-

Manual Evaluations

+

Evaluations

- {selectedTest && ( - - - - )} - {selectedTest && ( - + {tests?.tests?.length > 0 && ( + )}
- {testAveragesLoading || testsLoading || !tests ? ( + {testsLoading || !tests ? ( ) : tests?.tests?.length > 0 ? ( -
-
- {tests?.tests?.map((test: Test, i: number) => { - const average = - testAverages?.averages?.find( - (avg: any) => avg.testId === test?.id - )?.average || 0; - return ( -
-
{ - setSelectedTest(test); - setCurrentData([]); - setPage(1); - setTotalPages(1); - }} - className={cn( - "flex flex-col gap-4 p-4 items-start cursor-pointer", - i === 0 ? "rounded-t-md" : "", - i === tests?.tests?.length - 1 ? "rounded-b-md" : "", - selectedTest?.id === test.id - ? "dark:bg-black bg-white border-l-2 border-primary" - : "" - )} - > -

- {test.name} -

- - - {Math.round(average)}% - - -
- -
- ); - })} -
-
-
-
-
-

- {selectedTest?.name} Evaluation -

- - Test ID: {selectedTest?.id} - -
-
- - Evaluation Scale - - - {selectedTest?.min} to {selectedTest?.max} in steps of + - {selectedTest?.step} - -
- - - {Math.round(testAverage)}% - - -

- {selectedTest?.description} -

-
- {selectedTest && ( - - )} -
-
- - - {selectedTest && ( - - )} -
+
+ +
+ +
) : ( diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx new file mode 100644 index 00000000..28d289d5 --- /dev/null +++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx @@ -0,0 +1,3 @@ +export default function Experiments() { + return <>; +} diff --git a/app/(protected)/project/[project_id]/experiments/page.tsx b/app/(protected)/project/[project_id]/experiments/page.tsx new file mode 100644 index 00000000..91936f72 --- /dev/null +++ b/app/(protected)/project/[project_id]/experiments/page.tsx @@ -0,0 +1,24 @@ +import { authOptions } from "@/lib/auth/options"; +import { Metadata } from "next"; +import { getServerSession } from "next-auth"; +import { redirect } from "next/navigation"; +import Experiments from "./page-client"; + +export const metadata: Metadata = { + title: "Langtrace | Experiments", + description: "View and manage all your experiments in one place.", +}; + +export default async function Page() { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } + const email = session?.user?.email as string; + + return ( + <> + + + ); +} diff --git a/app/(protected)/project/[project_id]/prompts/page-client.tsx b/app/(protected)/project/[project_id]/prompts/page-client.tsx index 8b4eafcf..b5cc02b8 100644 --- a/app/(protected)/project/[project_id]/prompts/page-client.tsx +++ b/app/(protected)/project/[project_id]/prompts/page-client.tsx @@ -37,7 +37,7 @@ export default function PageClient({ email }: { email: string }) { }; const fetchPrompts = useQuery({ - queryKey: [`fetch-prompts-${projectId}-query`], + queryKey: ["fetch-prompts-query", projectId], queryFn: async () => { const response = await fetch( `/api/span-prompt?projectId=${projectId}&page=${page}&pageSize=${PAGE_SIZE}` @@ -188,7 +188,7 @@ const PromptRow = ({ const promptContent = extractSystemPromptFromLlmInputs(prompts); const fetchEvaluation = useQuery({ - queryKey: [`fetch-evaluation-query-${prompt.span_id}`], + queryKey: ["fetch-evaluation-query", prompt.span_id], queryFn: async () => { const response = await fetch(`/api/evaluation?prompt=${promptContent}`); diff --git a/app/(protected)/projects/page-client.tsx b/app/(protected)/projects/page-client.tsx index a3c2c77b..5c2afd8d 100644 --- a/app/(protected)/projects/page-client.tsx +++ b/app/(protected)/projects/page-client.tsx @@ -143,7 +143,7 @@ function ProjectCard({ teamId: string; }) { const { data: projectStats, isLoading: projectStatsLoading } = useQuery({ - queryKey: [`fetch-project-stats-${project.id}`], + queryKey: ["fetch-project-stats", project.id], queryFn: async () => { const response = await fetch( `/api/stats/project?projectId=${project.id}` diff --git a/app/api/data/route.ts b/app/api/data/route.ts index 89c907cc..fb186916 100644 --- a/app/api/data/route.ts +++ b/app/api/data/route.ts @@ -1,116 +1,186 @@ import { authOptions } from "@/lib/auth/options"; import prisma from "@/lib/prisma"; +import { authApiKey } from "@/lib/utils"; import { Data } from "@prisma/client"; import { getServerSession } from "next-auth"; import { redirect } from "next/navigation"; import { NextRequest, NextResponse } from "next/server"; export async function GET(req: NextRequest) { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); - } - - const id = req.nextUrl.searchParams.get("id") as string; - const spanId = req.nextUrl.searchParams.get("spanId") as string; - - if (!id && !spanId) { + try { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } + + const id = req.nextUrl.searchParams.get("id") as string; + const spanId = req.nextUrl.searchParams.get("spanId") as string; + + if (!id && !spanId) { + return NextResponse.json( + { + error: "No data id or span id provided", + }, + { status: 404 } + ); + } + + if (id) { + const result = await prisma.data.findFirst({ + where: { + id, + }, + }); + + return NextResponse.json({ + data: result, + }); + } + + if (spanId) { + const result = await prisma.data.findMany({ + where: { + spanId, + }, + }); + + return NextResponse.json({ + data: result, + }); + } + } catch (error) { return NextResponse.json( { - error: "No data id or span id provided", + message: "Internal server error", }, - { status: 404 } + { status: 500 } ); } +} - if (id) { - const result = await prisma.data.findFirst({ - where: { - id, - }, +export async function POST(req: NextRequest) { + try { + // check session + let projectId = ""; + const session = await getServerSession(authOptions); + if (!session || !session.user) { + // check api key + const apiKey = req.headers.get("x-api-key"); + if (!apiKey) { + redirect("/login"); + } + + const response = await authApiKey(apiKey!); + if (response.status !== 200) { + return response; + } + + // Get project data + const projectData = await response.json(); + projectId = projectData.data.project.id; + } + + const data = await req.json(); + const { datas, datasetId, runId } = data; + if (!projectId) { + projectId = data.projectId; + } + + const payload = datas.map((data: Data) => { + const d: any = { + input: data.input, + output: data.output, + contexts: data.contexts || [], + expectedOutput: data.expectedOutput || "", + note: data.note || "", + spanId: data.spanId || "", + projectId: projectId || "", + datasetId: datasetId || "", + }; + + if (runId) { + d.runId = runId; + } + return d; + }); + + const result = await prisma.data.createMany({ + data: payload, }); return NextResponse.json({ data: result, }); + } catch (error) { + console.log(error); + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); } +} + +export async function PUT(req: NextRequest) { + try { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } - if (spanId) { - const result = await prisma.data.findMany({ + const data = await req.json(); + const { id, input, output, expectedOutput, contexts, note } = data; + + const result = await prisma.data.update({ where: { - spanId, + id, + }, + data: { + input, + output, + expectedOutput, + contexts, + note, }, }); return NextResponse.json({ data: result, }); + } catch (error) { + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); } } -export async function POST(req: NextRequest) { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); - } - - const data = await req.json(); - const { datas, datasetId } = data; - - const result = await prisma.data.createMany({ - data: datas.map((data: Data) => ({ - input: data.input, - output: data.output, - note: data.note || "", - spanId: data.spanId || "", - datasetId, - })), - }); - - return NextResponse.json({ - data: result, - }); -} - -export async function PUT(req: NextRequest) { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); - } - - const data = await req.json(); - const { id, input, output, note } = data; - - const result = await prisma.data.update({ - where: { - id, - }, - data: { - input, - output, - note, - }, - }); - - return NextResponse.json({ - data: result, - }); -} - export async function DELETE(req: NextRequest) { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); - } + try { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } - const data = await req.json(); - const { id } = data; + const data = await req.json(); + const { id } = data; - await prisma.data.delete({ - where: { - id, - }, - }); + await prisma.data.delete({ + where: { + id, + }, + }); - return NextResponse.json({}); + return NextResponse.json({}); + } catch (error) { + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); + } } diff --git a/app/api/evaluation/route.ts b/app/api/evaluation/route.ts index 51b502f4..431631d1 100644 --- a/app/api/evaluation/route.ts +++ b/app/api/evaluation/route.ts @@ -5,111 +5,142 @@ import { getServerSession } from "next-auth"; import { NextRequest, NextResponse } from "next/server"; export async function POST(req: NextRequest) { - const apiKey = req.headers.get("x-api-key"); - if (apiKey !== null) { - const response = await authApiKey(apiKey); - if (response.status !== 200) { - return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); - } - const projectData = await response.json(); - const projectId = projectData.data.project.id; - const data = await req.json(); - const { traceId, spanId, userScore, userId } = data; - // check if an evaluation already exists for the spanId - const existingEvaluation = await prisma.evaluation.findFirst({ - where: { - spanId, - }, - }); - - if (existingEvaluation) { - return NextResponse.json( - { - error: "Evaluation already exists for this span", + try { + const apiKey = req.headers.get("x-api-key"); + if (apiKey !== null) { + const response = await authApiKey(apiKey); + if (response.status !== 200) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + const projectData = await response.json(); + const projectId = projectData.data.project.id; + const data = await req.json(); + const { traceId, spanId, userScore, userId, reason, dataId } = data; + // check if an evaluation already exists for the spanId + const existingEvaluation = await prisma.evaluation.findFirst({ + where: { + spanId, }, - { status: 400 } - ); - } + }); + + if (existingEvaluation) { + return NextResponse.json( + { + error: "Evaluation already exists for this span", + }, + { status: 400 } + ); + } - const evaluation = await prisma.evaluation.create({ - data: { + const payload: any = { spanId, traceId, projectId, userId, userScore, - }, - }); - return NextResponse.json({ - data: evaluation, - }); - } else { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - NextResponse.json({ error: "Unauthorized" }, { status: 401 }); - } - const email = session?.user?.email as string; - if (!email) { - return NextResponse.json( - { - error: "email not found", - }, - { status: 404 } - ); - } + reason: reason || "", + }; - const user = await prisma.user.findUnique({ - where: { - email, - }, - include: { - Team: true, - }, - }); + if (dataId) { + payload["dataId"] = dataId; + } - if (!user) { - return NextResponse.json( - { - error: "user not found", + const evaluation = await prisma.evaluation.create({ + data: payload, + }); + return NextResponse.json({ + data: evaluation, + }); + } else { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + const email = session?.user?.email as string; + if (!email) { + return NextResponse.json( + { + error: "email not found", + }, + { status: 404 } + ); + } + + const user = await prisma.user.findUnique({ + where: { + email, }, - { status: 404 } - ); - } + include: { + Team: true, + }, + }); - const data = await req.json(); - const { traceId, spanId, projectId, ltUserScore, testId } = data; + if (!user) { + return NextResponse.json( + { + error: "user not found", + }, + { status: 404 } + ); + } - // check if this user has access to this project - const project = await prisma.project.findFirst({ - where: { - id: projectId, - teamId: user.teamId, - }, - }); + const data = await req.json(); + const { + traceId, + spanId, + projectId, + ltUserScore, + testId, + reason, + dataId, + } = data; - if (!project) { - return NextResponse.json( - { - error: "User does not have access to this project", + // check if this user has access to this project + const project = await prisma.project.findFirst({ + where: { + id: projectId, + teamId: user.teamId, }, - { status: 403 } - ); - } + }); - const evaluation = await prisma.evaluation.create({ - data: { + if (!project) { + return NextResponse.json( + { + error: "User does not have access to this project", + }, + { status: 403 } + ); + } + + const payload: any = { spanId, traceId, ltUserId: user.id, projectId, ltUserScore, testId, - }, - }); + reason: reason || "", + }; - return NextResponse.json({ - data: evaluation, - }); + if (dataId) { + payload["dataId"] = dataId; + } + + const evaluation = await prisma.evaluation.create({ + data: payload, + }); + + return NextResponse.json({ + data: evaluation, + }); + } + } catch (error) { + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); } } @@ -132,6 +163,7 @@ export async function GET(req: NextRequest) { const spanId = req.nextUrl.searchParams.get("spanId") as string; const testId = req.nextUrl.searchParams.get("testId") as string; + const includeTest = req.nextUrl.searchParams.get("includeTest") === "true"; if (!projectId && !spanId) { return NextResponse.json( @@ -142,34 +174,6 @@ export async function GET(req: NextRequest) { ); } - if (spanId) { - let evaluations; - if (testId) { - evaluations = await prisma.evaluation.findMany({ - where: { - spanId, - testId, - }, - }); - } else { - evaluations = await prisma.evaluation.findFirst({ - where: { - spanId, - }, - }); - } - - if (!evaluations) { - return NextResponse.json({ - evaluations: [], - }); - } - - return NextResponse.json({ - evaluations: Array.isArray(evaluations) ? evaluations : [evaluations], - }); - } - // check if this user has access to this project if (session) { const email = session?.user?.email as string; @@ -203,10 +207,47 @@ export async function GET(req: NextRequest) { } } + if (spanId) { + let evaluations; + if (testId) { + evaluations = await prisma.evaluation.findFirst({ + where: { + spanId, + testId, + }, + include: { + Test: includeTest, + }, + }); + } else { + evaluations = await prisma.evaluation.findMany({ + where: { + spanId, + }, + include: { + Test: includeTest, + }, + }); + } + + if (!evaluations) { + return NextResponse.json({ + evaluations: [], + }); + } + + return NextResponse.json({ + evaluations: Array.isArray(evaluations) ? evaluations : [evaluations], + }); + } + const evaluations = await prisma.evaluation.findMany({ where: { projectId, }, + include: { + Test: includeTest, + }, }); if (!evaluations) { @@ -227,123 +268,133 @@ export async function GET(req: NextRequest) { } export async function PUT(req: NextRequest) { - const session = await getServerSession(authOptions); - const apiKey = req.headers.get("x-api-key"); - if (apiKey !== null) { - const response = await authApiKey(apiKey); - if (response.status !== 200) { - return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); - } - const projectData = await response.json(); - const projectId = projectData.data.project.id; + try { + const session = await getServerSession(authOptions); + const apiKey = req.headers.get("x-api-key"); + if (apiKey !== null) { + const response = await authApiKey(apiKey); + if (response.status !== 200) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + const projectData = await response.json(); + const projectId = projectData.data.project.id; - let { spanId, userScore, userId } = await req.json().catch(() => ({})); - if (!spanId || !userScore || !userId) { - return NextResponse.json( - { - error: - "spanId, userId and userScore are required in the request body", + let { spanId, userScore, userId } = await req.json().catch(() => ({})); + if (!spanId || !userScore || !userId) { + return NextResponse.json( + { + error: + "spanId, userId and userScore are required in the request body", + }, + { status: 400 } + ); + } + userScore = Number(userScore); + if (Number.isNaN(userScore)) { + return NextResponse.json( + { error: "userScore must be a number" }, + { status: 400 } + ); + } + if (userScore !== 1 && userScore !== -1) { + return NextResponse.json( + { error: "userScore must be 1 or -1" }, + { status: 400 } + ); + } + if (userId?.length === 0) { + return NextResponse.json( + { error: "userId must be a non-empty string" }, + { status: 400 } + ); + } + const evaluation = await prisma.evaluation.findFirst({ + where: { + projectId, + spanId, }, - { status: 400 } - ); - } - userScore = Number(userScore); - if (Number.isNaN(userScore)) { - return NextResponse.json( - { error: "userScore must be a number" }, - { status: 400 } - ); - } - if (userScore !== 1 && userScore !== -1) { - return NextResponse.json( - { error: "userScore must be 1 or -1" }, - { status: 400 } - ); - } - if (userId?.length === 0) { - return NextResponse.json( - { error: "userId must be a non-empty string" }, - { status: 400 } - ); - } - const evaluation = await prisma.evaluation.findFirst({ - where: { - projectId, - spanId, - }, - }); - if (!evaluation) { - return NextResponse.json( - { error: "Evaluation not found" }, - { status: 404 } - ); - } - const updatedEvaluation = await prisma.evaluation.update({ - where: { - id: evaluation.id, - }, - data: { - userScore, - userId, - }, - }); - return NextResponse.json({ data: updatedEvaluation }); - } else { - if (!session || !session.user) { - NextResponse.json({ error: "Unauthorized" }, { status: 401 }); - } - const email = session?.user?.email as string; - if (!email) { - return NextResponse.json( - { - error: "email not found", + }); + if (!evaluation) { + return NextResponse.json( + { error: "Evaluation not found" }, + { status: 404 } + ); + } + const updatedEvaluation = await prisma.evaluation.update({ + where: { + id: evaluation.id, }, - { status: 404 } - ); - } - - const user = await prisma.user.findUnique({ - where: { - email, - }, - include: { - Team: true, - }, - }); + data: { + userScore, + userId, + }, + }); + return NextResponse.json({ data: updatedEvaluation }); + } else { + if (!session || !session.user) { + NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + const email = session?.user?.email as string; + if (!email) { + return NextResponse.json( + { + error: "email not found", + }, + { status: 404 } + ); + } - if (!user) { - return NextResponse.json( - { - error: "user not found", + const user = await prisma.user.findUnique({ + where: { + email, }, - { status: 404 } - ); - } + include: { + Team: true, + }, + }); - const data = await req.json(); - const { id, ltUserScore, testId } = data; - const evaluation = await prisma.evaluation.update({ - where: { - id, - }, - data: { - ltUserId: user.id, - ltUserScore, - testId, - }, - }); + if (!user) { + return NextResponse.json( + { + error: "user not found", + }, + { status: 404 } + ); + } - if (!evaluation) { - return NextResponse.json( - { - error: "No evaluation found", + const data = await req.json(); + const { id, ltUserScore, testId, reason } = data; + const evaluation = await prisma.evaluation.update({ + where: { + id, }, - { status: 404 } - ); - } + data: { + ltUserId: user.id, + ltUserScore, + testId, + reason: reason || "", + }, + }); - return NextResponse.json({ - data: evaluation, - }); + if (!evaluation) { + return NextResponse.json( + { + error: "No evaluation found", + }, + { status: 404 } + ); + } + + return NextResponse.json({ + data: evaluation, + }); + } + } catch (error) { + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); } } diff --git a/app/api/metrics/accuracy/route.ts b/app/api/metrics/accuracy/route.ts deleted file mode 100644 index 257f4e02..00000000 --- a/app/api/metrics/accuracy/route.ts +++ /dev/null @@ -1,130 +0,0 @@ -import { authOptions } from "@/lib/auth/options"; -import prisma from "@/lib/prisma"; -import { TraceService } from "@/lib/services/trace_service"; -import { Evaluation } from "@prisma/client"; -import { getServerSession } from "next-auth"; -import { redirect } from "next/navigation"; -import { NextRequest, NextResponse } from "next/server"; - -export async function GET(req: NextRequest) { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); - } - - const projectId = req.nextUrl.searchParams.get("projectId") as string; - const testId = req.nextUrl.searchParams.get("testId") as string; - let lastNDays = Number(req.nextUrl.searchParams.get("lastNDays")); - let overallAccuracy = 0 - - if(Number.isNaN(lastNDays) || lastNDays < 0){ - lastNDays = 7; - } - - if (!projectId) { - return NextResponse.json( - { - error: "Please provide a projectId", - }, - { status: 400 } - ); - } - - // check if this user has access to this project - const email = session?.user?.email as string; - const user = await prisma.user.findUnique({ - where: { - email, - }, - }); - - if (!user) { - return NextResponse.json( - { - error: "user not found", - }, - { status: 404 } - ); - } - - // check if this user has access to this project - const project = await prisma.project.findFirst({ - where: { - id: projectId, - teamId: user.teamId, - }, - }); - - if (!project) { - return NextResponse.json( - { - error: "User does not have access to this project", - }, - { status: 403 } - ); - } - - let evaluations: Evaluation[] = []; - const traceService = new TraceService(); - //Fetch last 7 days of spanIds from clickhouse - const spans = await traceService.GetSpansInProject( - projectId, - lastNDays - ); - - // get evalutaion for the lastNDays - // and all evaluations where score is 1 or -1 - evaluations = await prisma.evaluation.findMany({ - where: { - projectId, - testId, - spanId: { in: [...spans.map((span) => span.span_id)]}, - ltUserScore: { - in: [1, -1], - } - }, - }); - if (!evaluations) { - return NextResponse.json({ accuracyPerDay: [], overallAccuracy: null }, { status: 200 }); - } - const evalsByDate: Record = {}; - evaluations.forEach((evaluation, index) => { - const span = spans[index]; - const date = span.start_time.split("T")[0]; - if(evalsByDate[date]){ - evalsByDate[date].push(evaluation); - } else { - evalsByDate[date] = [evaluation]; - } - }) - let totalPositive = 0; - let totalNegative = 0; - - const accuracyPerDay = Object.entries(evalsByDate).map(([date, scores]) => { - let totalPositivePerDay = 0; - let totalNegativePerDay = 0; - - scores.forEach((score) => { - if (score.ltUserScore === 1) { - totalPositivePerDay += 1; - totalPositive += 1; - } else { - totalNegativePerDay+= 1; - totalNegative += 1; - } - }); - const accuracy = (totalPositive / (totalPositive + totalNegative)) * 100; - return { - date, - accuracy, - }; - }); - accuracyPerDay.sort((a, b) => new Date(a.date).getTime() - new Date(b.date).getTime()); - // calculate average - overallAccuracy = (totalPositive / (totalPositive + totalNegative)) * 100; - - return NextResponse.json({ - overallAccuracy, - accuracyPerDay - }); -} diff --git a/app/api/metrics/score/route.ts b/app/api/metrics/score/route.ts new file mode 100644 index 00000000..b25b1f2d --- /dev/null +++ b/app/api/metrics/score/route.ts @@ -0,0 +1,126 @@ +import { authOptions } from "@/lib/auth/options"; +import prisma from "@/lib/prisma"; +import { TraceService } from "@/lib/services/trace_service"; +import { getServerSession } from "next-auth"; +import { redirect } from "next/navigation"; +import { NextRequest, NextResponse } from "next/server"; + +export async function POST(req: NextRequest) { + try { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } + + const { projectId, testIds, lastNHours, filters, filterOperation } = + await req.json(); + + if (!projectId) { + return NextResponse.json( + { + error: "Please provide a projectId", + }, + { status: 400 } + ); + } + 1; + const email = session?.user?.email as string; + const user = await prisma.user.findUnique({ + where: { + email, + }, + }); + + if (!user) { + return NextResponse.json( + { + error: "user not found", + }, + { status: 404 } + ); + } + + // check if this user has access to this project + const project = await prisma.project.findFirst({ + where: { + id: projectId, + teamId: user.teamId, + }, + }); + + if (!project) { + return NextResponse.json( + { + error: "User does not have access to this project", + }, + { status: 403 } + ); + } + + const traceService = new TraceService(); + const spans = await traceService.GetSpansInProject( + projectId, + lastNHours, + filters, + filterOperation + ); + + let evaluations = []; + const dateScoreMap: any = {}; + + for (const testId of testIds) { + evaluations = await prisma.evaluation.findMany({ + where: { + projectId, + testId, + spanId: { in: [...spans.map((span) => span.span_id)] }, + }, + include: { + Test: true, + }, + }); + + evaluations.forEach((evaluation) => { + const span = spans.find((span) => span.span_id === evaluation.spanId); + if (!span) return; + const date = span.start_time.split("T")[0]; + + if (!dateScoreMap[date]) { + dateScoreMap[date] = {}; + } + + if (!dateScoreMap[date][`${testId}-${evaluation.Test?.name}`]) { + dateScoreMap[date][`${testId}-${evaluation.Test?.name}`] = 0; + } + + dateScoreMap[date][`${testId}-${evaluation.Test?.name}`] += + evaluation.ltUserScore || 0; + }); + } + + const chartData = Object.entries(dateScoreMap).map( + ([date, scoresByTestId]) => { + const entry: any = { date }; + Object.entries(scoresByTestId as any).forEach(([testId, score]) => { + entry[testId] = score; + }); + return entry; + } + ); + + chartData.sort( + (a, b) => + new Date(a.date as string).getTime() - + new Date(b.date as string).getTime() + ); + + return NextResponse.json(chartData); + } catch (error) { + return NextResponse.json( + { + error: "Internal server error", + }, + { status: 500 } + ); + } +} diff --git a/app/api/test/route.ts b/app/api/test/route.ts index 6d9d4e79..c1c06f0f 100644 --- a/app/api/test/route.ts +++ b/app/api/test/route.ts @@ -1,18 +1,36 @@ import { authOptions } from "@/lib/auth/options"; import prisma from "@/lib/prisma"; +import { authApiKey } from "@/lib/utils"; import { getServerSession } from "next-auth"; import { redirect } from "next/navigation"; import { NextRequest, NextResponse } from "next/server"; export async function GET(req: NextRequest) { try { + // check session + let projectId = ""; const session = await getServerSession(authOptions); if (!session || !session.user) { - redirect("/login"); + // check api key + const apiKey = req.headers.get("x-api-key"); + if (!apiKey) { + redirect("/login"); + } + + const response = await authApiKey(apiKey!); + if (response.status !== 200) { + return response; + } + + // Get project data + const projectData = await response.json(); + projectId = projectData.data.project.id; } const id = req.nextUrl.searchParams.get("id") as string; - const projectId = req.nextUrl.searchParams.get("projectId") as string; + if (!projectId) { + projectId = req.nextUrl.searchParams.get("projectId") as string; + } if (!projectId && !id) { return NextResponse.json( { @@ -72,68 +90,117 @@ export async function GET(req: NextRequest) { } export async function POST(req: NextRequest) { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); - } + try { + // check session + let projectId = ""; + const session = await getServerSession(authOptions); + if (!session || !session.user) { + // check api key + const apiKey = req.headers.get("x-api-key"); + if (!apiKey) { + redirect("/login"); + } + + const response = await authApiKey(apiKey!); + if (response.status !== 200) { + return response; + } + + // Get project data + const projectData = await response.json(); + projectId = projectData.data.project.id; + } + + const data = await req.json(); + const { name, description, type, evaluationCriteria, min, max, step } = + data; + + if (!projectId) { + projectId = data.projectId; + } - const data = await req.json(); - const { name, description, projectId, min, max, step } = data; - - const test = await prisma.test.create({ - data: { - name: name, - description: description, - projectId: projectId, - min: min ?? -1, - max: max ?? 1, - step: step ?? 2, - }, - }); - - return NextResponse.json({ - data: test, - }); + const test = await prisma.test.create({ + data: { + name: name, + description: description, + projectId: projectId, + type: type ?? "manual", + evaluationCriteria: evaluationCriteria ?? "", + min: min ?? -1, + max: max ?? 1, + step: step ?? 2, + }, + }); + + return NextResponse.json({ + data: test, + }); + } catch (error) { + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); + } } export async function PUT(req: NextRequest) { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); - } + try { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } + + const data = await req.json(); + const { id, name, description } = data; - const data = await req.json(); - const { id, name, description } = data; - - const test = await prisma.test.update({ - where: { - id, - }, - data: { - name, - description, - }, - }); - - return NextResponse.json({ - data: test, - }); + const test = await prisma.test.update({ + where: { + id, + }, + data: { + name, + description, + }, + }); + + return NextResponse.json({ + data: test, + }); + } catch (error) { + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); + } } export async function DELETE(req: NextRequest) { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); - } + try { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } - const data = await req.json(); - const { id } = data; + const data = await req.json(); + const { id } = data; - await prisma.test.delete({ - where: { - id, - }, - }); + await prisma.test.delete({ + where: { + id, + }, + }); - return NextResponse.json({}); + return NextResponse.json({}); + } catch (error) { + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); + } } diff --git a/components/charts/eval-chart.tsx b/components/charts/eval-chart.tsx index 7bbd8410..51b00bd6 100644 --- a/components/charts/eval-chart.tsx +++ b/components/charts/eval-chart.tsx @@ -2,63 +2,109 @@ import { formatDurationForDisplay } from "@/lib/utils"; import { Test } from "@prisma/client"; +// import { AreaChart } from "@tremor/react"; import { AreaChart } from "@tremor/react"; +import { useState } from "react"; import { useQuery } from "react-query"; +import DayFilter, { timeRanges } from "../shared/day-filter"; import { Info } from "../shared/info"; import LargeChartLoading from "./large-chart-skeleton"; +const data = [ + { + testId: "clx2ua1ag002xpctlwyupmoaz", + overall: 100, + perday: [ + { + date: "2024-05-22", + score: 100, + }, + { + date: "2024-05-21", + score: 100, + }, + ], + }, + { + testId: "clx2ua1ag112xpctlwyupmoaz", + overall: 50, + perday: [ + { + date: "2024-05-22", + score: 50, + }, + { + date: "2024-05-18", + score: 50, + }, + ], + }, +]; + export function EvalChart({ projectId, - test, - lastNHours = 168, - chartDescription = "Evaluated Average(%) over time (last 7 days) for LLM interactions aggregated by day.", - info = "Average is calculated based on the score of evaluated llm interactions in the Evaluation tab of the project. Span's start_time is used for day aggregation.", + tests, + chartDescription = "Trend of test scores over the selected period of time", + info = "Score is the sum total all the evaluated score for a test over the selected period of time.", }: { projectId: string; - test: Test; - lastNHours?: number; + tests: Test[]; chartDescription?: string; info?: string; }) { - const fetchAccuracy = useQuery({ - queryKey: ["fetch-accuracy", projectId, test.id, lastNHours], + const [lastNHours, setLastNHours] = useState(timeRanges[0].value); + + const { data: chartData, isLoading } = useQuery({ + queryKey: ["fetch-score", projectId, lastNHours], queryFn: async () => { - const response = await fetch( - `/api/metrics/accuracy?projectId=${projectId}&testId=${test.id}` - ); + const filters = [ + { + key: "llm.prompts", + operation: "NOT_EQUALS", + value: "", + type: "attribute", + }, + { + key: "status_code", + operation: "EQUALS", + value: "OK", + type: "property", + }, + ]; + const response = await fetch("/api/metrics/score", { + method: "POST", + body: JSON.stringify({ + projectId, + testIds: tests.map((test) => test.id), + lastNHours, + filters, + filterOperation: "AND", + }), + }); const result = await response.json(); return result; }, }); - if (fetchAccuracy.isLoading || !fetchAccuracy.data) { + if (isLoading || !chartData) { return ; } else { - const data: Array> = - fetchAccuracy?.data?.accuracyPerDay; - const overallAccuracy: number = fetchAccuracy?.data?.overallAccuracy; - return ( <>
+

{chartDescription}

-

- Overall Accuracy: {overallAccuracy?.toFixed(2)}% -

({ - date: dat.date, - "Evaluated Accuracy(%)": dat.accuracy, - }))} + data={chartData} index="date" - categories={["Evaluated Accuracy(%)"]} - colors={["purple"]} + categories={tests.map((test) => `${test.id}-${test.name}`)} + colors={["purple", "blue", "red", "green", "orange", "black"]} showAnimation={true} showTooltip={true} yAxisWidth={33} diff --git a/components/evaluate/create-test.tsx b/components/evaluate/create-test.tsx index c0ab8b75..64271c3c 100644 --- a/components/evaluate/create-test.tsx +++ b/components/evaluate/create-test.tsx @@ -94,6 +94,18 @@ export function CreateTest({
{ try { + if (!data.min || !data.max || !data.step) { + throw new Error("Please provide a scale for the test."); + } + if (parseInt(data.min, 10) >= parseInt(data.max, 10)) { + throw new Error("Min score must be less than max score."); + } + if (parseInt(data.step, 10) <= 0) { + throw new Error("Step value must be greater than 0."); + } + if (parseInt(data.min, 10) === parseInt(data.max, 10)) { + throw new Error("Min score cannot be equal to max score."); + } setBusy(true); await fetch("/api/test", { method: "POST", @@ -111,9 +123,9 @@ export function CreateTest({ projectId, }), }); - await queryClient.invalidateQueries( - `fetch-tests-${projectId}-query` - ); + await queryClient.invalidateQueries({ + queryKey: ["fetch-tests-query", projectId], + }); toast("Test created!", { description: "Your test has been created.", }); @@ -278,6 +290,7 @@ export function CreateTest({ max={max} step={step} selectedValue={min} + disableAutoFocus={true} />
)} diff --git a/components/evaluate/edit-test.tsx b/components/evaluate/edit-test.tsx index cf32dd71..5596d14e 100644 --- a/components/evaluate/edit-test.tsx +++ b/components/evaluate/edit-test.tsx @@ -1,4 +1,12 @@ import { Button } from "@/components/ui/button"; +import { + Command, + CommandEmpty, + CommandGroup, + CommandInput, + CommandItem, + CommandList, +} from "@/components/ui/command"; import { Dialog, DialogContent, @@ -23,10 +31,17 @@ import { FormMessage, } from "@/components/ui/form"; import { Input } from "@/components/ui/input"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; +import { cn } from "@/lib/utils"; import { zodResolver } from "@hookform/resolvers/zod"; import { Test } from "@prisma/client"; import { DotsHorizontalIcon } from "@radix-ui/react-icons"; -import { EditIcon, TrashIcon } from "lucide-react"; +import { Check, ChevronsUpDown, EditIcon, TrashIcon } from "lucide-react"; +import * as React from "react"; import { useState } from "react"; import { useForm } from "react-hook-form"; import { useQueryClient } from "react-query"; @@ -35,15 +50,16 @@ import { z } from "zod"; import { Info } from "../shared/info"; export function EditTest({ - test, + tests, projectId, className = "w-full text-left p-0 text-muted-foreground hover:text-primary flex items-center", }: { - test: Test; + tests: Test[]; projectId: string; variant?: any; className?: string; }) { + const [test, setTest] = useState(); const queryClient = useQueryClient(); const [open, setOpen] = useState(false); const [openEdit, setOpenEdit] = useState(false); @@ -55,13 +71,20 @@ export function EditTest({ }); const EditTestForm = useForm({ resolver: zodResolver(schema), - defaultValues: { - name: test.name || "", - description: test.description || "", - }, }); return ( - + { + if (!value) { + setTest(undefined); + setOpenEdit(false); + setOpenDelete(false); + EditTestForm.reset(); + } + setOpen(value); + }} + > - + {!test && } + {test && ( + <> + ( + + + Name + + + + + + + + )} + /> + ( + + + Description + + + + + + + + )} + /> + + + + + )} @@ -207,8 +242,11 @@ export function EditTest({ Delete Test - Are you sure you want to delete this test? + {test + ? `Are you sure you want to delete ${test?.name} ?` + : "Select a test to delete."} + {!test && } + + + + + + No test found. + + {tests.map((test) => ( + { + setValue(currentValue === value ? "" : currentValue); + setOpen(false); + onSelect(test); + }} + > + + {test.name} + + ))} + + + + + + ); +} diff --git a/components/evaluate/eval-dialog.tsx b/components/evaluate/eval-dialog.tsx deleted file mode 100644 index aea7839d..00000000 --- a/components/evaluate/eval-dialog.tsx +++ /dev/null @@ -1,517 +0,0 @@ -import { - AlertDialog, - AlertDialogContent, - AlertDialogTrigger, -} from "@/components/ui/alert-dialog"; -import { Button } from "@/components/ui/button"; -import { correctTimestampFormat } from "@/lib/trace_utils"; -import { - cn, - extractSystemPromptFromLlmInputs, - formatDateTime, -} from "@/lib/utils"; -import { Test } from "@prisma/client"; -import { Cross1Icon, EnterIcon } from "@radix-ui/react-icons"; -import { ProgressCircle } from "@tremor/react"; -import { - ArrowDownSquareIcon, - ArrowUpSquareIcon, - CheckIcon, - ChevronLeft, - ChevronRight, - ChevronsRight, - DeleteIcon, -} from "lucide-react"; -import { useEffect, useState } from "react"; -import Markdown from "react-markdown"; -import { useQuery, useQueryClient } from "react-query"; -import { toast } from "sonner"; -import { VendorLogo } from "../shared/vendor-metadata"; -import { Skeleton } from "../ui/skeleton"; -import { ScaleType } from "./eval-scale-picker"; -import { RangeScale } from "./range-scale"; - -export function EvalDialog({ - test, - projectId, -}: { - test: Test; - projectId: string; -}) { - const [open, setOpen] = useState(false); - return ( - setOpen(o)}> - - - - - setOpen(false)} - /> - - - ); -} - -function EvalContent({ - test, - projectId, - close, -}: { - test: Test; - projectId: string; - close: () => void; -}) { - const min = test?.min !== undefined && test?.min !== null ? test.min : -1; - const max = test?.max !== undefined && test?.max !== null ? test.max : 1; - const step = test?.step !== undefined && test?.step !== null ? test.step : 2; - const [score, setScore] = useState(min); - const [scorePercent, setScorePercent] = useState(0); - const [color, setColor] = useState("red"); - const [page, setPage] = useState(1); - const [totalPages, setTotalPages] = useState(1); - const [span, setSpan] = useState(null); - const [addedToDataset, setAddedToDataset] = useState(false); - const [busy, setBusy] = useState(false); - const queryClient = useQueryClient(); - - // Reset the score and color when the test changes - useEffect(() => { - setScore(min); - setScorePercent(0); - setColor("red"); - setPage(1); - setTotalPages(1); - setSpan(null); - }, [test?.id]); - - useEffect(() => { - const handleKeyPress = (event: any) => { - if (event.key === "Enter") { - next(); - } - if (event.key === "Backspace") { - previous(); - } - }; - - // Add event listener - window.addEventListener("keydown", handleKeyPress); - - // Remove event listener on cleanup - return () => { - window.removeEventListener("keydown", handleKeyPress); - }; - }, []); - - const { isLoading } = useQuery({ - queryKey: ["fetch-spans-query", page, test?.id], - queryFn: async () => { - const filters = [ - { - key: "llm.prompts", - operation: "NOT_EQUALS", - value: "", - }, - // Accuracy is the default test. So no need to - // send the testId with the spans when using the SDK. - { - key: "langtrace.testId", - operation: "EQUALS", - value: test.name.toLowerCase() !== "factual accuracy" ? test.id : "", - }, - ]; - - // convert filterserviceType to a string - const apiEndpoint = "/api/spans"; - const body = { - page, - pageSize: 1, - projectId: projectId, - filters: filters, - filterOperation: "AND", - }; - - const response = await fetch(apiEndpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify(body), - }); - const result = await response.json(); - return result; - }, - onSuccess: (data) => { - // Get the newly fetched data and metadata - const spans = data?.spans?.result || []; - const metadata = data?.spans?.metadata || {}; - - // Update the total pages and current page number - setTotalPages(parseInt(metadata?.total_pages) || 1); - - // Update the span state - if (spans.length > 0) { - setSpan(spans[0]); - } - }, - }); - - const next = async () => { - // Evaluate the current score - await evaluate(); - if (page < totalPages) { - setPage((prev) => prev + 1); - } - }; - - const previous = () => { - if (page > 1) { - setPage((prev) => prev - 1); - } - }; - - const { isLoading: isEvaluationLoading, data: evaluationsData } = useQuery({ - queryKey: ["fetch-evaluation-query", span?.span_id], - queryFn: async () => { - const response = await fetch(`/api/evaluation?spanId=${span?.span_id}`); - const result = await response.json(); - const sc = - result.evaluations.length > 0 ? result.evaluations[0].ltUserScore : min; - onScoreSelected(sc); - return result; - }, - enabled: !!span, - }); - - useQuery({ - queryKey: ["fetch-data-query", span?.span_id], - queryFn: async () => { - const response = await fetch(`/api/data?spanId=${span?.span_id}`); - const result = await response.json(); - setAddedToDataset(result.data.length > 0); - return result; - }, - enabled: !!span, - }); - - const evaluate = async () => { - setBusy(true); - try { - const attributes = span?.attributes ? JSON.parse(span.attributes) : {}; - if (Object.keys(attributes).length === 0) return; - const model = attributes["llm.model"]; - const prompts = attributes["llm.prompts"]; - const systemPrompt = extractSystemPromptFromLlmInputs(prompts); - - // Check if an evaluation already exists - if (evaluationsData?.evaluations[0]?.id) { - if (evaluationsData.evaluations[0].ltUserScore === score) { - setBusy(false); - return; - } - // Update the existing evaluation - await fetch("/api/evaluation", { - method: "PUT", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - id: evaluationsData.evaluations[0].id, - ltUserScore: score, - }), - }); - queryClient.invalidateQueries([ - "fetch-evaluation-query", - span?.span_id, - ]); - } else { - // Create a new evaluation - await fetch("/api/evaluation", { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - projectId: projectId, - spanId: span.span_id, - traceId: span.trace_id, - ltUserScore: score, - testId: test.id, - }), - }); - queryClient.invalidateQueries([ - "fetch-evaluation-query", - span?.span_id, - ]); - } - toast.success("Span evaluated successfully!"); - } catch (error: any) { - toast.error("Error evaluating the span!", { - description: `There was an error evaluating the span: ${error.message}`, - }); - } finally { - setBusy(false); - } - }; - - const onScoreSelected = (value: number) => { - setScore(value); - - // Calculate the percentage of the score using min, max and step - const range = max - min; - const steps = range / step; - const scorePercent = ((value - min) / steps) * 100; - setScorePercent(scorePercent); - - if (scorePercent < 33) { - setColor("red"); - } - if (scorePercent >= 33 && scorePercent < 66) { - setColor("yellow"); - } - if (scorePercent >= 66) { - setColor("green"); - } - }; - - if (isLoading) { - return ; - } else { - return ( -
-
-

- {page}/{totalPages} -

- -
- -
-
-

- {test?.name || "No name provided"} -

-

- {test?.description || "No description provided"} -

-
-
-
-

- Evaluation Scale{" "} -

-

- {min} to {max} in steps of +{step} -

-
- {span?.start_time && ( -
-

- Timestamp -

-

- {formatDateTime(correctTimestampFormat(span?.start_time))} -

-
- )} -
-

- Scale - - {evaluationsData?.evaluations[0]?.id - ? "Evaluated" - : "Not Evaluated"} - -

- {isEvaluationLoading || isLoading || busy ? ( -
- {[1, 2, 3, 4].map((item) => ( - - ))} -
- ) : ( - - )} -

Score

- -

{score}

-
-
-

Hotkeys

-
- - -

Arrow keys to navigate the scale

-
-
- -

- Enter/Return to submit the score and continue to the next - evaluation -

-
-
- -

- Delete/Backspace to go back to the previous evaluation -

-
-
-

Esc

-

Press Esc to exit the evaluation dialog

-
-
-
-
- - - -
-
- ); - } -} - -function ConversationView({ span }: { span: any }) { - const attributes = span?.attributes ? JSON.parse(span.attributes) : {}; - if (!attributes) return

No data found

; - - const prompts = attributes["llm.prompts"]; - const responses = attributes["llm.responses"]; - - if (!prompts && !responses) return

No data found

; - - return ( -
- {prompts?.length > 0 && - JSON.parse(prompts).map((prompt: any, i: number) => ( -
-

- {prompt?.role - ? prompt?.role === "function" - ? `${prompt?.role} - ${prompt?.name}` - : prompt?.role - : "Input"} - : - {prompt?.content - ? " (content)" - : prompt?.function_call - ? " (function call)" - : ""} -

{" "} - - {prompt?.content - ? prompt?.content - : prompt?.function_call - ? JSON.stringify(prompt?.function_call) - : "No input found"} - -
- ))} - {responses?.length > 0 && - JSON.parse(responses).map((response: any, i: number) => ( -
-
- -

- {response?.message?.role || "Output"}: -

{" "} -
- - {response?.message?.content || - response?.text || - response?.content || - "No output found"} - -
- ))} -
- ); -} - -function EvalDialogSkeleton() { - return ( -
-
- - - - - - - -
-
- - - - - -
- - -
-
- - -
-
- - -
-
- - -
-
-
- ); -} diff --git a/components/evaluate/evaluation-row.tsx b/components/evaluate/evaluation-row.tsx index 99a612c0..28d70d98 100644 --- a/components/evaluate/evaluation-row.tsx +++ b/components/evaluate/evaluation-row.tsx @@ -1,21 +1,28 @@ import { HoverCell } from "@/components/shared/hover-cell"; -import { LLMView } from "@/components/shared/llm-view"; -import { Button } from "@/components/ui/button"; import { Checkbox } from "@/components/ui/checkbox"; +import { + Sheet, + SheetContent, + SheetDescription, + SheetHeader, + SheetTitle, +} from "@/components/ui/sheet"; import detectPII from "@/lib/pii"; import { correctTimestampFormat } from "@/lib/trace_utils"; import { calculatePriceFromUsage, cn, formatDateTime } from "@/lib/utils"; -import { Evaluation } from "@prisma/client"; +import { Evaluation, Test } from "@prisma/client"; import { - ArrowTopRightIcon, CheckCircledIcon, CrossCircledIcon, DotFilledIcon, } from "@radix-ui/react-icons"; -import { ChevronDown, ChevronRight } from "lucide-react"; -import Link from "next/link"; -import { useState } from "react"; -import { useQuery } from "react-query"; +import { ProgressCircle } from "@tremor/react"; +import { useEffect, useState } from "react"; +import { useQuery, useQueryClient } from "react-query"; +import { toast } from "sonner"; +import ConversationView from "../shared/conversation-view"; +import { ScaleType } from "./eval-scale-picker"; +import { RangeScale } from "./range-scale"; interface CheckedData { input: string; @@ -27,35 +34,33 @@ export default function EvaluationRow({ key, span, projectId, - testId, - page, + tests, onCheckedChange, selectedData, + selectedSpan, + setSelectedSpan, }: { key: number; span: any; projectId: string; - testId: string; - page: number; + tests: Test[]; onCheckedChange: (data: CheckedData, checked: boolean) => void; selectedData: CheckedData[]; + setSelectedSpan: (spanId: string) => void; + selectedSpan?: string; }) { - const [score, setScore] = useState(-100); // 0: neutral, 1: thumbs up, -1: thumbs down - const [collapsed, setCollapsed] = useState(true); - const [evaluation, setEvaluation] = useState(); + const [evaluations, setEvaluations] = useState(); const [addedToDataset, setAddedToDataset] = useState(false); + const [open, setOpen] = useState(false); useQuery({ - queryKey: ["fetch-evaluation-query", span.span_id, testId], + queryKey: ["fetch-evaluation-query", span?.span_id], queryFn: async () => { const response = await fetch( - `/api/evaluation?spanId=${span.span_id}&testId=${testId}` + `/api/evaluation?spanId=${span?.span_id}&projectId=${projectId}&includeTest=true` ); const result = await response.json(); - setEvaluation(result.evaluations.length > 0 ? result.evaluations[0] : {}); - setScore( - result.evaluations.length > 0 ? result.evaluations[0].ltUserScore : -100 - ); + setEvaluations(result.evaluations.length > 0 ? result.evaluations : []); return result; }, }); @@ -75,8 +80,8 @@ export default function EvaluationRow({ if (!attributes) return null; // extract the metrics - const userScore = evaluation?.userScore || ""; - const userId = evaluation?.userId || ""; + const userScore = evaluations ? evaluations[0]?.userScore || "" : ""; + const userId = evaluations ? evaluations[0]?.userId || "" : ""; const startTimeMs = new Date( correctTimestampFormat(span.start_time) ).getTime(); @@ -88,8 +93,8 @@ export default function EvaluationRow({ let vendor = ""; let tokenCounts: any = {}; let cost = { total: 0, input: 0, output: 0 }; + model = attributes["llm.model"] || ""; if (attributes["llm.token.counts"]) { - model = attributes["llm.model"]; vendor = attributes["langtrace.service.name"]; tokenCounts = JSON.parse(attributes["llm.token.counts"]); cost = calculatePriceFromUsage(vendor.toLowerCase(), model, tokenCounts); @@ -114,20 +119,25 @@ export default function EvaluationRow({ ).length > 0); return ( -
-
setCollapsed(!collapsed)} - > + { + setOpen(true); + setSelectedSpan(span.span_id); + }} + className={cn( + "rounded-md hover:bg-muted w-full hover:cursor-pointer", + selectedSpan === span.span_id ? "bg-muted" : "" + )} + > +
e.stopPropagation()} + className="flex flex-row items-center gap-2" + // onClick={(e) => e.stopPropagation()} > e.stopPropagation()} onCheckedChange={(state: boolean) => { const input = JSON.parse(prompts).find( (prompt: any) => prompt.role === "user" @@ -154,39 +164,25 @@ export default function EvaluationRow({ }} checked={selectedData.some((d) => d.spanId === span.span_id)} /> - -

setCollapsed(!collapsed)} - > +

{formatDateTime(correctTimestampFormat(span.start_time))}

-

{model}

+ + {model} + 0 ? JSON.parse(prompts) : []} /> + + 0 ? JSON.parse(responses) : []} /> -

- {cost.total.toFixed(6) !== "0.000000" - ? `\$${cost.total.toFixed(6)}` - : ""} -

+ +
{piiDetected ? ( @@ -195,40 +191,198 @@ export default function EvaluationRow({ )}

{piiDetected ? "Yes" : "No"}

-

- {durationMs}ms -

-

- {score !== -100 ? score : "Not evaluated"} -

-

- {userScore ? userScore : "Not evaluated"} -

-

{userId || "Not Available"}

-
- {addedToDataset ? ( - - ) : ( - - )} - - - + + {tests.map((test: Test, i) => { + const evaluation = evaluations?.find((e) => e.testId === test.id); + return ( + + {evaluation ? evaluation.ltUserScore : "Not evaluated"} + + ); + })} + + {userScore ? userScore : "Not evaluated"} + + {userId || "Not available"} + + {addedToDataset ? ( + + ) : ( + + )} + +
+ + e.preventDefault()} + onInteractOutside={(e) => { + e.preventDefault(); + }} + className={cn("w-[1500px] overflow-y-scroll")} + onClick={(e) => e.stopPropagation()} + > + + Evaluate + + Evaluate the input and output of this LLM request. + + +
+
+ +
+
+ {tests.map((test: Test, i) => { + const evaluation = evaluations?.find( + (e) => e.testId === test.id + ); + return ( + + ); + })} +
+
+
+
+
+ + ); +} + +function EvaluateTest({ + test, + span, + projectId, + evaluation, +}: { + test: Test; + projectId: string; + span: any; + evaluation?: Evaluation; +}) { + const [score, setScore] = useState(0); + const [color, setColor] = useState("red"); + const [scorePercent, setScorePercent] = useState(0); + const queryClient = useQueryClient(); + + useEffect(() => { + if (evaluation && evaluation.ltUserScore) { + setScore(evaluation.ltUserScore); + onScoreSelected(test, evaluation.ltUserScore); + } + }, []); + + const onScoreSelected = (test: Test, value: number, submit = false) => { + setScore(value); + + // Calculate the percentage of the score using min, max and step + if (!test) return; + const max = test?.max || 0; + const min = test?.min || 0; + const range = max - min; + const scorePercent = ((value - min) / range) * 100; + setScorePercent(scorePercent); + + if (scorePercent < 33) { + setColor("red"); + } + if (scorePercent >= 33 && scorePercent < 66) { + setColor("yellow"); + } + if (scorePercent >= 66) { + setColor("green"); + } + if (submit) { + evaluate(value); + } + }; + + const evaluate = async (value: number) => { + try { + const attributes = span?.attributes ? JSON.parse(span.attributes) : {}; + if (Object.keys(attributes).length === 0) return; + + // Check if an evaluation already exists + if (evaluation) { + if (evaluation.ltUserScore === value) { + return; + } + // Update the existing evaluation + await fetch("/api/evaluation", { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + id: evaluation.id, + ltUserScore: value, + testId: test.id, + }), + }); + await queryClient.invalidateQueries({ + queryKey: ["fetch-evaluation-query", span?.span_id], + }); + } else { + // Create a new evaluation + await fetch("/api/evaluation", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + projectId: projectId, + spanId: span.span_id, + traceId: span.trace_id, + ltUserScore: value, + testId: test.id, + }), + }); + await queryClient.invalidateQueries({ + queryKey: ["fetch-evaluation-query", span?.span_id], + }); + } + } catch (error: any) { + toast.error("Error evaluating the span!", { + description: `There was an error evaluating the span: ${error.message}`, + }); + } + }; + + return ( +
+
+
+

+ {test?.name || "No name provided"} +

+ +

{score}

+
+

+ {test?.description || "No description provided"} +

- {!collapsed && ( - - )} + onScoreSelected(test, value, true)} + />
); } diff --git a/components/evaluate/evaluation-table.tsx b/components/evaluate/evaluation-table.tsx index 46af7b3f..49442d10 100644 --- a/components/evaluate/evaluation-table.tsx +++ b/components/evaluate/evaluation-table.tsx @@ -16,8 +16,8 @@ interface CheckedData { } export default function EvaluationTable({ + tests, projectId, - test, selectedData, setSelectedData, currentData, @@ -27,8 +27,8 @@ export default function EvaluationTable({ totalPages, setTotalPages, }: { + tests: Test[]; projectId: string; - test: Test; selectedData: CheckedData[]; setSelectedData: (data: CheckedData[]) => void; currentData: any; @@ -39,6 +39,7 @@ export default function EvaluationTable({ setTotalPages: (totalPages: number) => void; }) { const [showLoader, setShowLoader] = useState(false); + const [selectedSpan, setSelectedSpan] = useState(); const onCheckedChange = (data: CheckedData, checked: boolean) => { if (checked) { @@ -49,13 +50,20 @@ export default function EvaluationTable({ }; const fetchLlmPromptSpans = useQuery({ - queryKey: [`fetch-llm-prompt-spans-${test.id}-query`], + queryKey: ["fetch-llm-prompt-spans-query"], queryFn: async () => { const filters = [ { key: "llm.prompts", operation: "NOT_EQUALS", value: "", + type: "attribute", + }, + { + key: "status_code", + operation: "EQUALS", + value: "OK", + type: "property", }, ]; @@ -141,44 +149,61 @@ export default function EvaluationTable({ }); return ( -
- {currentData.length > 0 && ( -
-

- Timestamp (UTC) -

-

Model

-

Input

-

Output

-

Cost

-

PII Detected

-

Duration

-

Evaluated Score

-

User Score

-

User Id

-

Added to Dataset

-
- )} +
{fetchLlmPromptSpans.isLoading || !fetchLlmPromptSpans.data || !currentData ? ( ) : ( - currentData.map((span: any, i: number) => { - if (span.status_code !== "ERROR") { - return ( - - ); - } - }) + + + + + + + + + {tests.map((test: Test, i) => ( + + ))} + + + + + + + {currentData.length > 0 && + currentData.map((span: any, i: number) => { + if (span.status_code !== "ERROR") { + return ( + + ); + } + })} + +
+ Timestamp (UTC) + + Model + InputOutput + PII Detected + + {test.name} + User ScoreUser Id + Added to Dataset +
)} {showLoader && (
diff --git a/components/evaluate/range-scale.tsx b/components/evaluate/range-scale.tsx index 477302f7..6383f6b9 100644 --- a/components/evaluate/range-scale.tsx +++ b/components/evaluate/range-scale.tsx @@ -16,6 +16,7 @@ interface RangeScaleProps { type?: ScaleType | "range"; variant?: string | "default"; disabled?: boolean; + disableAutoFocus?: boolean; } export function RangeScale({ @@ -27,11 +28,13 @@ export function RangeScale({ type = ScaleType.Range, variant = "default", disabled = false, + disableAutoFocus = false, }: RangeScaleProps) { const radioRef = React.createRef(); const buttonRef = React.createRef(); useEffect(() => { + if (disableAutoFocus) return; if (radioRef.current) { radioRef.current.focus(); } diff --git a/components/playground/common.tsx b/components/playground/common.tsx index 6d31b5f8..0cd64a4e 100644 --- a/components/playground/common.tsx +++ b/components/playground/common.tsx @@ -10,6 +10,7 @@ import { OpenAIRole, } from "@/lib/types/playground_types"; import { cn } from "@/lib/utils"; +import { ArrowTopRightIcon } from "@radix-ui/react-icons"; import { MinusCircleIcon, PlusIcon } from "lucide-react"; import { useEffect, useRef, useState } from "react"; import { useQuery } from "react-query"; @@ -79,20 +80,22 @@ export function ExpandingTextArea({ return (