diff --git a/.github/actions/update-viablestrict/action.yml b/.github/actions/update-viablestrict/action.yml index 162058caae..c3c320b888 100644 --- a/.github/actions/update-viablestrict/action.yml +++ b/.github/actions/update-viablestrict/action.yml @@ -25,8 +25,16 @@ inputs: description: The token to use to push to the stable protected branch required: true type: string - rockset-api-key: - description: The API key to query Rockset, read-only + clickhouse-url: + description: ClickHouse URL + required: true + type: string + clickhouse-username: + description: ClickHouse username + required: true + type: string + clickhouse-password: + description: ClickHouse password required: true type: string test-infra-repository: @@ -67,13 +75,15 @@ runs: - name: Install Python Packages shell: bash run: | - pip install rockset==1.0.3 boto3==1.19.12 + pip install clickhouse-connect==0.7.16 boto3==1.19.12 - name: Get latest viable commit id: get-latest-commit working-directory: ${{ inputs.repository }} env: - ROCKSET_API_KEY: ${{ inputs.rockset-api-key }} + CLICKHOUSE_ENDPOINT: ${{ inputs.clickhouse-url }} + CLICKHOUSE_USERNAME: ${{ inputs.clickhouse-username }} + CLICKHOUSE_PASSWORD: ${{ inputs.clickhouse-password }} shell: bash run: | set -ex diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 27830721af..87cfc41132 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -33,7 +33,7 @@ jobs: echo ::group::setup Python environment python -m venv .venv/ source .venv/bin/activate - pip install pip==23.0.1 pytest==7.2.0 rockset==1.0.3 jsonschema==4.17.3 + pip install pip==23.0.1 pytest==7.2.0 rockset==1.0.3 jsonschema==4.17.3 clickhouse-connect==0.7.16 echo ::endgroup:: # Test tools @@ -52,7 +52,8 @@ jobs: python -m venv .venv/ source .venv/bin/activate pip install pip==23.0.1 pytest==7.2.0 rockset==1.0.3 \ - jsonschema==4.17.3 numpy==1.24.1 pandas==2.1.4 boto3==1.19.12 + jsonschema==4.17.3 numpy==1.24.1 pandas==2.1.4 boto3==1.19.12 \ + clickhouse-connect==0.7.16 echo ::endgroup:: # Test tools diff --git a/tools/scripts/fetch_latest_green_commit.py b/tools/scripts/fetch_latest_green_commit.py index d29915e213..43bf9445f4 100644 --- a/tools/scripts/fetch_latest_green_commit.py +++ b/tools/scripts/fetch_latest_green_commit.py @@ -1,14 +1,14 @@ import json -import os from pathlib import Path import re import sys from typing import Any, cast, Dict, List, NamedTuple, Optional, Tuple -import rockset # type: ignore[import] REPO_ROOT = Path(__file__).resolve().parents[2] -sys.path.insert(0, str(REPO_ROOT)) -from tools.scripts.gitutils import _check_output +sys.path.insert(0, str(REPO_ROOT / "tools")) +from torchci.clickhouse import query_clickhouse_saved +from scripts.gitutils import _check_output + sys.path.pop(0) @@ -49,19 +49,9 @@ def get_latest_commits(viable_strict_branch: str, main_branch: str) -> List[str] def query_commits(commits: List[str]) -> List[Dict[str, Any]]: - rs = rockset.RocksetClient( - host="api.usw2a1.rockset.com", api_key=os.environ["ROCKSET_API_KEY"] - ) - params = [{"name": "shas", "type": "string", "value": ",".join(commits)}] - res = rs.QueryLambdas.execute_query_lambda( - # https://console.rockset.com/lambdas/details/commons.commit_jobs_batch_query - query_lambda="commit_jobs_batch_query", - version="19c74e10819104f9", - workspace="commons", - parameters=params, - ) + res = query_clickhouse_saved("commit_jobs_batch_query", {"shas": commits}) - return cast(List[Dict[str, Any]], res.results) + return cast(List[Dict[str, Any]], res) def print_commit_status(commit: str, results: Dict[str, Any]) -> None: diff --git a/torchci/clickhouse_queries/commit_jobs_batch_query/params.json b/torchci/clickhouse_queries/commit_jobs_batch_query/params.json new file mode 100644 index 0000000000..580234f78b --- /dev/null +++ b/torchci/clickhouse_queries/commit_jobs_batch_query/params.json @@ -0,0 +1,3 @@ +{ + "shas": "Array(String)" +} diff --git a/torchci/clickhouse_queries/commit_jobs_batch_query/query.sql b/torchci/clickhouse_queries/commit_jobs_batch_query/query.sql new file mode 100644 index 0000000000..2ead210220 --- /dev/null +++ b/torchci/clickhouse_queries/commit_jobs_batch_query/query.sql @@ -0,0 +1,87 @@ +-- This query is used by pytorch .github/scripts/fetch_latest_green_commit.py script to upgrade +-- viable/strict commit. The latest green commit is the latest commit without any failures that +-- block viable/strict upgrade. ATM, these failures are: lint, pull, and trunk +WITH job AS ( + SELECT + job.run_attempt as _run_attempt, + workflow.head_sha AS sha, + job.name AS job_name, + workflow.name AS workflow_name, + job.id as id, + workflow.id AS workflow_id, + workflow.artifacts_url AS github_artifact_url, + job.conclusion as conclusion, + job.html_url as html_url, + CONCAT( + 'https://ossci-raw-job-status.s3.amazonaws.com/log/', + job.id + ) AS log_url, + DATE_DIFF( + 'SECOND', + job.started_at, + job.completed_at + ) AS duration_s, + IF(job.torchci_classification.'line' = '', [], [job.torchci_classification.'line']) AS failure_line, + job.torchci_classification.'context' AS failure_context, + job.torchci_classification.'captures' AS failure_captures, + job.torchci_classification.'line_num' AS failure_line_number + FROM + default.workflow_job job final + INNER JOIN default.workflow_run workflow final ON workflow.id = job.run_id + WHERE + job.name != 'ciflow_should_run' + AND job.name != 'generate-test-matrix' + AND workflow.event != 'workflow_run' -- Filter out workflow_run-triggered jobs, which have nothing to do with the SHA + AND workflow.event != 'repository_dispatch' -- Filter out repository_dispatch-triggered jobs, which have nothing to do with the SHA + and job.id in (select id from materialized_views.workflow_job_by_head_sha where head_sha in {shas: Array(String)}) + and workflow.id in (select id from materialized_views.workflow_run_by_head_sha where head_sha in {shas: Array(String)}) +), +latest_jobs AS ( + SELECT + sha, + MAX(_run_attempt) AS run_attempt, + workflow_name, + job_name, + argMax(id, _run_attempt) AS id, + workflow_id, + argMax(github_artifact_url, _run_attempt) AS github_artifact_url, + argMax(conclusion, _run_attempt) AS conclusion, + argMax(html_url, _run_attempt) AS html_url, + argMax(log_url, _run_attempt) AS log_url, + argMax(duration_s, _run_attempt) AS duration_s, + argMax(failure_line, _run_attempt) AS failure_line, + argMax(failure_line_number, _run_attempt) AS failure_line_number, + argMax(failure_context, _run_attempt) AS failure_context, + argMax(failure_captures, _run_attempt) AS failure_captures + FROM + job + GROUP BY + sha, + workflow_id, + workflow_name, + job_name +) +SELECT + sha, + run_attempt, + workflow_name AS workflowName, + job_name AS jobName, + CONCAT(workflow_name, ' / ', job_name) AS name, + id, + workflow_id AS workflowId, + github_artifact_url AS githubArtifactUrl, + CASE + WHEN conclusion is NULL THEN 'pending' + ELSE conclusion + END AS conclusion, + html_url AS htmlUrl, + log_url AS logUrl, + duration_s AS durationS, + failure_line AS failureLine, + failure_line_number AS failureLineNumber, + failure_context AS failureContext, + failure_captures AS failureCaptures +FROM + latest_jobs +ORDER BY + name