Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CH] Migrate failure annotation page #5803

Merged
merged 3 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions torchci/clickhouse_queries/failed_workflow_jobs/params.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"branch": "String",
"count": "Int64",
"repo": "String",
"startTime": "DateTime64(3)",
"stopTime": "DateTime64(3)"
}
}
82 changes: 24 additions & 58 deletions torchci/clickhouse_queries/failed_workflow_jobs/query.sql
Original file line number Diff line number Diff line change
@@ -1,60 +1,26 @@
-- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted
WITH repeats AS (
SELECT
array_agg(j.id) AS ids
FROM
workflow_run w
JOIN workflow_job j ON w.id = j.run_id HINT(join_strategy = lookup)
WHERE
j._event_time >= PARSE_DATETIME_ISO8601(: startTime)
AND j._event_time < PARSE_DATETIME_ISO8601(: stopTime)
AND w.head_repository.full_name = : repo
AND w.head_branch = : branch
AND w.event != 'workflow_run'
AND w.event != 'repository_dispatch'
GROUP BY
j.head_sha,
j.name,
w.name
HAVING
count(*) > : count
AND bool_or(
j.conclusion IN (
'failure', 'cancelled', 'time_out'
)
)
),
ids AS (
SELECT
ids.id
FROM
repeats,
UNNEST(repeats.ids AS id) AS ids
)
SELECT
job.head_sha AS sha,
CONCAT(w.name, ' / ', job.name) AS jobName,
job.id,
job.conclusion,
job.html_url AS htmlUrl,
CONCAT(
'https://ossci-raw-job-status.s3.amazonaws.com/log/',
CAST(job.id AS string)
) AS logUrl,
DATE_DIFF(
'SECOND',
PARSE_TIMESTAMP_ISO8601(job.started_at),
PARSE_TIMESTAMP_ISO8601(job.completed_at)
) AS durationS,
w.repository.full_name AS repo,
ARRAY_CREATE(job.torchci_classification.line) AS failureLines,
job.torchci_classification.captures AS failureCaptures,
ARRAY_CREATE(job.torchci_classification.line_num) AS failureLineNumbers,
-- This query is used to annotate job on HUD
SELECT DISTINCT
j.head_sha AS sha,
CONCAT(w.name, ' / ', j.name) AS jobName,
j.id,
j.conclusion,
j.html_url AS htmlUrl,
CONCAT(
'https://ossci-raw-job-status.s3.amazonaws.com/log/',
j.id
) AS logUrl,
DATE_DIFF('SECOND', j.started_at, j.completed_at) AS durationS,
array(j.torchci_classification. 'line') AS failureLines,
j.torchci_classification. 'captures' AS failureCaptures,
array(j.torchci_classification. 'line_num') AS failureLineNumbers
FROM
ids
JOIN workflow_job job on job.id = ids.id
INNER JOIN workflow_run w on w.id = job.run_id
workflow_job j FINAL
JOIN workflow_run w FINAL on w.id = j.run_id
WHERE
job.conclusion IN (
'failure', 'cancelled', 'time_out'
)
j.created_at >= {startTime: DateTime64(3) }
AND j.created_at < {stopTime: DateTime64(3) }
AND w.head_repository. 'full_name' = {repo: String }
AND w.head_branch = {branch: String }
AND w.event != 'workflow_run'
AND w.event != 'repository_dispatch'
AND j.conclusion IN ('failure', 'cancelled', 'time_out')
6 changes: 5 additions & 1 deletion torchci/components/JobAnnotationToggle.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { ToggleButton, ToggleButtonGroup } from "@mui/material";
import _ from "lodash";
import { useSession } from "next-auth/react";
import React from "react";
import { JobAnnotation, JobData } from "../lib/types";
Expand All @@ -15,7 +16,10 @@ export default function JobAnnotationToggle({
repo?: string | null;
}) {
const allJobs = similarJobs ?? [];
allJobs.push(job);
// Double check if the job exists before adding it
if (!_.find(allJobs, (j: JobData) => j.id === job.id)) {
allJobs.push(job);
}

const [state, setState] = React.useState<JobAnnotation>(
(annotation ?? "null") as JobAnnotation
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
import { queryClickhouseSaved } from "lib/clickhouse";
import { getDynamoClient } from "lib/dynamo";
import getRocksetClient, { RocksetParam } from "lib/rockset";
import { JobData } from "lib/types";
import { NextApiRequest, NextApiResponse } from "next";
import rocksetVersions from "rockset/prodVersions.json";

async function fetchFailureJobs(
queryParams: RocksetParam[]
): Promise<JobData[]> {
const rocksetClient = getRocksetClient();
const failedJobs = await rocksetClient.queryLambdas.executeQueryLambda(
"commons",
"failed_workflow_jobs",
rocksetVersions.commons.failed_workflow_jobs,
{
parameters: queryParams,
}
);
return failedJobs.results ?? [];
async function fetchFailureJobs(queryParams: {
[key: string]: any;
}): Promise<JobData[]> {
return await queryClickhouseSaved("failed_workflow_jobs", queryParams);
}

export default async function handler(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import LogViewer from "components/LogViewer";
import dayjs from "dayjs";
import { fetcher } from "lib/GeneralUtils";
import { isRerunDisabledTestsJob, isUnstableJob } from "lib/jobUtils";
import { RocksetParam } from "lib/rockset";
import { JobAnnotation, JobData } from "lib/types";
import _ from "lodash";
import { useRouter } from "next/router";
Expand Down Expand Up @@ -38,14 +37,17 @@ function SimilarFailedJobs({
{showDetail ? "▼ " : "▶ "}
<code>Failing {similarJobs.length} times</code>
</button>
{showDetail &&
_.map(similarJobs, (job) => (
<FailedJob
job={job}
similarJobs={[]}
classification={classification}
/>
))}
<ul>
{showDetail &&
_.map(similarJobs, (job) => (
<FailedJob
job={job}
similarJobs={[]}
classification={classification}
key={job.id}
/>
))}
</ul>
</div>
);
}
Expand Down Expand Up @@ -97,12 +99,13 @@ function FailedJob({
}

function FailedJobsByFailure({
jobs,
jobsBySha,
annotations,
}: {
jobs: JobData[];
jobsBySha: { [sha: string]: JobData };
annotations: { [id: string]: { [key: string]: any } };
}) {
const jobs: JobData[] = _.map(jobsBySha);
// Select a random representative job in the group of similar jobs. Once
// this job is classified, the rest will be put into the same category
const job: JobData | undefined = _.sample(jobs);
Expand All @@ -125,15 +128,15 @@ function FailedJobs({
repoName,
repoOwner,
}: {
queryParams: RocksetParam[];
queryParams: { [key: string]: any };
repoName: string;
repoOwner: string;
}) {
// Note: querying the list of failed jobs here and send their IDs over to get
// their annotation is not a scalable solution because the list of failures
// could be longer than the browser-dependent URL-length limit. The workaround
// here is to send the query param over to another annotation API that will then
// make a query to Rockset to get the list of failed jobs itself and return the
// make a query to the db to get the list of failed jobs itself and return the
// list to the caller here
const { data: failedJobsWithAnnotations } = useSWR(
`/api/job_annotation/${repoOwner}/${repoName}/failures/${encodeURIComponent(
Expand All @@ -156,7 +159,7 @@ function FailedJobs({
// Grouped by annotation then by job name
const groupedJobs: {
[annotation: string]: {
[name: string]: JobData[];
[name: string]: { [sha: string]: JobData };
};
} = {};

Expand All @@ -183,10 +186,13 @@ function FailedJobs({

const failure = jobName + workflowName + failureCaptures;
if (!(failure in groupedJobs[annotation])) {
groupedJobs[annotation][failure] = [];
groupedJobs[annotation][failure] = {};
}

groupedJobs[annotation][failure].push(job);
const sha = job.sha;
if (!(sha in groupedJobs[annotation][failure])) {
groupedJobs[annotation][failure][sha] = job;
}
});

return (
Expand All @@ -205,17 +211,17 @@ function FailedJobs({
{_.reduce(
groupedJobsByFailure,
(s, v) => {
return s + v.length;
return s + Object.keys(v).length;
},
0
)}
)
</summary>
<ul>
{_.map(groupedJobsByFailure, (jobs, failure) => (
{_.map(groupedJobsByFailure, (jobsBySha, failure) => (
<FailedJobsByFailure
key={failure}
jobs={jobs}
jobsBySha={jobsBySha}
annotations={annotations}
/>
))}
Expand All @@ -233,33 +239,12 @@ export default function Page() {
const [stopTime, setStopTime] = useState(dayjs());
const [timeRange, setTimeRange] = useState<number>(7);

const queryParams: RocksetParam[] = [
{
name: "startTime",
type: "string",
value: startTime,
},
{
name: "stopTime",
type: "string",
value: stopTime,
},
{
name: "repo",
type: "string",
value: `${repoOwner}/${repoName}`,
},
{
name: "branch",
type: "string",
value: `${branch}`,
},
{
name: "count",
type: "int",
value: "0", // Set the count to 0 to query all failures
},
];
const queryParams: { [key: string]: any } = {
branch: branch,
repo: `${repoOwner}/${repoName}`,
startTime: dayjs(startTime).utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),
stopTime: dayjs(stopTime).utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),
};

return (
<div>
Expand Down
Loading