diff --git a/.github/workflows/tag_teams.yml b/.github/workflows/tag_teams.yml new file mode 100644 index 0000000000000..9711f718b1e16 --- /dev/null +++ b/.github/workflows/tag_teams.yml @@ -0,0 +1,47 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# GH actions. +# We use it to cover windows and mac builds +# Jenkins is still the primary CI + +name: Teams + +on: + # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target + pull_request_target: + types: [opened, reopened, edited, ready_for_review, labeled] + issues: + types: [opened, edited, reopened, labeled] + +concurrency: + group: Teams-${{ github.event.pull_request.number }}-${{ github.event.issue.number }} + cancel-in-progress: true + +jobs: + tag-teams: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Tag people from relevant teams + env: + PR: ${{ toJson(github.event.pull_request) }} + ISSUE: ${{ toJson(github.event.issue) }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + python tests/scripts/github_tag_teams.py || echo failed diff --git a/tests/python/unittest/test_ci.py b/tests/python/unittest/test_ci.py index 6ca46bc60cd57..90c9262c34956 100644 --- a/tests/python/unittest/test_ci.py +++ b/tests/python/unittest/test_ci.py @@ -19,6 +19,7 @@ import subprocess import sys import json +import textwrap import tempfile import pytest @@ -406,5 +407,240 @@ def all_time_keys(time): ) +def assert_in(needle: str, haystack: str): + if needle not in haystack: + raise AssertionError(f"item not found:\n{needle}\nin:\n{haystack}") + + +def test_github_tag_teams(tmpdir_factory): + tag_script = REPO_ROOT / "tests" / "scripts" / "github_tag_teams.py" + + def run(type, data, check): + git = TempGit(tmpdir_factory.mktemp("tmp_git_dir")) + git.run("init") + git.run("checkout", "-b", "main") + git.run("remote", "add", "origin", "https://github.com/apache/tvm.git") + + issue_body = """ + some text + [temporary] opt-in: @person5 + + - something: @person1 @person2 + - something else @person1 @person2 + - something else2: @person1 @person2 + - something-else @person1 @person2 + """ + comment1 = """ + another thing: @person3 + another-thing @person3 + """ + comment2 = """ + something @person4 + """ + teams = { + "data": { + "repository": { + "issue": { + "body": issue_body, + "comments": {"nodes": [{"body": comment1}, {"body": comment2}]}, + } + } + } + } + env = { + type: json.dumps(data), + } + proc = subprocess.run( + [ + str(tag_script), + "--dry-run", + "--team-issue-json", + json.dumps(teams), + ], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + cwd=git.cwd, + env=env, + ) + if proc.returncode != 0: + raise RuntimeError(f"Process failed:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}") + + assert_in(check, proc.stdout) + + run( + "ISSUE", + { + "title": "A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "abc"}], + "body": textwrap.dedent( + """ + hello + """.strip() + ), + }, + "No one to cc, exiting", + ) + + run( + "ISSUE", + { + "title": "A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "abc"}], + "body": textwrap.dedent( + """ + hello + + cc @test + """.strip() + ), + }, + "No one to cc, exiting", + ) + + run( + type="ISSUE", + data={ + "title": "A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "something"}], + "body": textwrap.dedent( + """ + hello + + something""" + ), + }, + check="would have updated issues/1234 with {'body': '\\nhello\\n\\nsomething\\n\\ncc @person1 @person2 @person4'}", + ) + + run( + type="ISSUE", + data={ + "title": "A title", + "number": 1234, + "user": { + "login": "person6", + }, + "labels": [{"name": "something"}], + "body": textwrap.dedent( + """ + hello + + something""" + ), + }, + check="Author person6 is not opted in, quitting", + ) + + run( + type="ISSUE", + data={ + "title": "A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "something"}], + "body": textwrap.dedent( + """ + hello + + cc @person1 @person2 @person4""" + ), + }, + check="Everyone to cc is already cc'ed, no update needed", + ) + + run( + type="ISSUE", + data={ + "title": "[something] A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "something2"}], + "body": textwrap.dedent( + """ + hello + + something""" + ), + }, + check="would have updated issues/1234 with {'body': '\\nhello\\n\\nsomething\\n\\ncc @person1 @person2 @person4'}", + ) + + run( + type="ISSUE", + data={ + "title": "[something] A title", + "number": 1234, + "user": { + "login": "person5", + }, + "labels": [{"name": "something2"}], + "body": textwrap.dedent( + """ + hello + + cc @person1 @person2 @person4""" + ), + }, + check="Everyone to cc is already cc'ed, no update needed", + ) + + run( + type="PR", + data={ + "title": "[something] A title", + "number": 1234, + "draft": False, + "user": { + "login": "person5", + }, + "labels": [{"name": "something2"}], + "body": textwrap.dedent( + """ + hello + + cc @person1 @person2 @person4""" + ), + }, + check="Everyone to cc is already cc'ed, no update needed", + ) + + run( + type="PR", + data={ + "title": "[something] A title", + "number": 1234, + "draft": True, + "user": { + "login": "person5", + }, + "labels": [{"name": "something2"}], + "body": textwrap.dedent( + """ + hello + + cc @person1 @person2 @person4""" + ), + }, + check="Terminating since 1234 is a draft", + ) + + if __name__ == "__main__": sys.exit(pytest.main([__file__] + sys.argv[1:])) diff --git a/tests/scripts/git_utils.py b/tests/scripts/git_utils.py index 0885907130013..8e8cbfb1e2616 100644 --- a/tests/scripts/git_utils.py +++ b/tests/scripts/git_utils.py @@ -20,7 +20,7 @@ import subprocess import re from urllib import request -from typing import Dict, Tuple, Any +from typing import Dict, Tuple, Any, Optional, List class GitHubRepo: @@ -35,8 +35,16 @@ def headers(self): "Authorization": f"Bearer {self.token}", } - def graphql(self, query: str) -> Dict[str, Any]: - return self._post("https://api.github.com/graphql", {"query": query}) + def graphql(self, query: str, variables: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + if variables is None: + variables = {} + response = self._post( + "https://api.github.com/graphql", {"query": query, "variables": variables} + ) + if "data" not in response: + msg = f"Error fetching data with query:\n{query}\n\nvariables:\n{variables}\n\nerror:\n{json.dumps(response, indent=2)}" + raise RuntimeError(msg) + return response def _post(self, full_url: str, body: Dict[str, Any]) -> Dict[str, Any]: print("Requesting POST to", full_url, "with", body) @@ -95,3 +103,18 @@ def git(command, **kwargs): if proc.returncode != 0: raise RuntimeError(f"Command failed {command}:\nstdout:\n{proc.stdout}") return proc.stdout.strip() + + +def find_ccs(body: str) -> List[str]: + matches = re.findall(r"(cc( @[-A-Za-z0-9]+)+)", body, flags=re.MULTILINE) + matches = [full for full, last in matches] + + reviewers = [] + for match in matches: + if match.startswith("cc "): + match = match.replace("cc ", "") + users = [x.strip() for x in match.split("@")] + reviewers += users + + reviewers = set(x for x in reviewers if x != "") + return list(reviewers) diff --git a/tests/scripts/github_tag_teams.py b/tests/scripts/github_tag_teams.py new file mode 100755 index 0000000000000..a461f562d7845 --- /dev/null +++ b/tests/scripts/github_tag_teams.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import json +import argparse +import re +from urllib import error +from typing import Dict, Any, List, Tuple + + +from git_utils import git, GitHubRepo, parse_remote, find_ccs + + +def parse_line(line: str) -> Tuple[str, List[str]]: + line = line.lstrip(" -") + line = line.split() + + # Parse out the name as anything up until the first tagged person + tag_items = [] + tag_end = 0 + for i, piece in enumerate(line): + if piece.startswith("@"): + tag_end = i + break + + tag_items.append(piece) + + tag = " ".join(tag_items).rstrip(":") + + # From the last word that was part of the tag name, start looking for users + # tagged with a '@' + users = [] + for piece in line[tag_end:]: + if piece.startswith("@"): + users.append(piece.lstrip("@")) + + return (tag, list(sorted(users))) + + +def fetch_issue(github: GitHubRepo, issue_number: int): + query = """query($owner: String!, $name: String!, $number: Int!){ + repository(owner: $owner, name: $name) { + issue(number: $number) { + body + comments(first:100) { + nodes { + body + } + } + } + } + }""" + r = github.graphql( + query, + variables={ + "owner": github.user, + "name": github.repo, + "number": issue_number, + }, + ) + return r + + +def find_rollout_users(r: Dict[str, Any]): + issue = r["data"]["repository"]["issue"] + body = issue["body"] + for line in body.split("\n"): + line = line.strip() + if line.startswith("[temporary] opt-in: "): + line = line[len("[temporary] opt-in: ") :] + return find_ccs("cc " + line) + + return [] + + +def parse_teams(r: Dict[str, Any], issue_number: int) -> Dict[str, str]: + """ + Fetch an issue and parse out series of tagged people from the issue body + and comments + """ + issue = r["data"]["repository"]["issue"] + + if issue is None or issue.get("body") is None: + raise RuntimeError(f"Could not find issue #{issue_number}\n\n{json.dumps(r, indent=2)}") + + result = {} + + def add_tag(tag, users): + if tag in result: + result[tag] += users + else: + result[tag] = users + + # Parse the issue body (only bullets are looked at) + for line in issue["body"].split("\n"): + line = line.strip() + if not line.startswith("- "): + continue + if "@" not in line: + continue + + tag, users = parse_line(line) + add_tag(tag, users) + + # Parse comment bodies + for comment in issue["comments"]["nodes"]: + for line in comment["body"].split("\n"): + if "@" not in line: + continue + + tag, users = parse_line(line) + add_tag(tag, users) + + # De-duplicate users listed twice for the same tag + for tag in result: + result[tag] = list(set(result[tag])) + + return {k.lower(): v for k, v in result.items()} + + +def tags_from_title(title: str) -> List[str]: + tags = re.findall(r"\[(.*?)\]", title) + tags = [t.strip() for t in tags] + return tags + + +def tags_from_labels(labels: List[Dict[str, Any]]) -> List[str]: + return [label["name"] for label in labels] + + +def add_ccs_to_body(body: str, to_cc: List[str]) -> str: + lines = body.split("\n") + + cc_line_idx = None + for i, line in enumerate(reversed(lines)): + if line.strip() == "": + continue + if line.startswith("cc @"): + cc_line_idx = len(lines) - i - 1 + else: + break + + def gen_cc_line(users): + users = sorted(users) + return "cc " + " ".join([f"@{user}" for user in users]) + + if cc_line_idx is None: + print("Did not find existing cc line") + lines.append("") + lines.append(gen_cc_line(to_cc)) + else: + # Edit cc line in place + line = lines[cc_line_idx] + print(f"Found existing cc line at {cc_line_idx}: {line}") + existing_ccs = find_ccs(line) + print(f"Found cc's: {existing_ccs}") + + if set(to_cc).issubset(set(existing_ccs)): + # Don't do anything if there is no update needed + return None + + line = gen_cc_line(set(existing_ccs + to_cc)) + + lines[cc_line_idx] = line + + return "\n".join(lines) + + +if __name__ == "__main__": + help = "Automatically tag people based on PR / issue labels" + parser = argparse.ArgumentParser(description=help) + parser.add_argument("--remote", default="origin", help="ssh remote to parse") + parser.add_argument("--team-issue", default="10317", help="issue number to look at for ccs") + parser.add_argument( + "--team-issue-json", help="(testing only) issue JSON to parse rather than fetch from GitHub" + ) + parser.add_argument( + "--dry-run", + action="store_true", + default=False, + help="run but don't send any request to GitHub", + ) + args = parser.parse_args() + + remote = git(["config", "--get", f"remote.{args.remote}.url"]) + user, repo = parse_remote(remote) + + if args.team_issue_json: + issue_data = json.loads(args.team_issue_json) + else: + github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo) + issue_data = fetch_issue(github, issue_number=int(args.team_issue)) + + # Fetch the list of teams + teams = parse_teams(issue_data, issue_number=int(args.team_issue)) + # When rolling out this tool it is limited to certain users, so find that list + rollout_users = find_rollout_users(issue_data) + print(f"[slow rollout] Limiting to opted-in users: {rollout_users}") + + print(f"Found these teams in issue #{args.team_issue}\n{json.dumps(teams, indent=2)}") + + # Extract the payload from GitHub Actions + issue = json.loads(os.getenv("ISSUE", "null")) + pr = json.loads(os.getenv("PR", "null")) + if (issue is None and pr is None) or (issue is not None and pr is not None): + raise RuntimeError("Exactly one of $PR or $ISSUE must be set in the environment") + + if pr is not None: + if pr["draft"]: + print(f"Terminating since {pr['number']} is a draft") + exit(0) + + # PRs/issues have the same structure for the fields needed here + item = issue if issue is not None else pr + title = item["title"] + body = item["body"] + author = item["user"]["login"] + tags = tags_from_title(item["title"]) + tags_from_labels(item["labels"]) + + tags = [t.lower() for t in tags] + print(f"Found tags: {tags}") + + if author not in rollout_users: + print(f"Author {author} is not opted in, quitting") + exit(0) + + # Update the PR or issue based on tags in the title and GitHub tags + to_cc = [teams.get(t, []) for t in tags] + to_cc = list(set(item for sublist in to_cc for item in sublist)) + to_cc = [user for user in to_cc if user != author] + print("Users to cc based on labels", to_cc) + + # Create the new PR/issue body + if len(to_cc) == 0: + print("No one to cc, exiting") + exit(0) + + new_body = add_ccs_to_body(body, to_cc) + if new_body is None: + print(f"Everyone to cc is already cc'ed, no update needed") + exit(0) + + print(f"Changing body from:\n----\n{body}\n----\nto:\n----\n{new_body}\n----") + + # Set the PR/issue body on GitHub + data = {"body": new_body} + if issue is not None: + issue_number = issue["number"] + url = f"issues/{issue_number}" + elif pr is not None: + pr_number = pr["number"] + url = f"pulls/{pr_number}" + else: + raise RuntimeError("Unreachable, please report a bug with a link to the failed workflow") + + if not args.dry_run: + github.post(url, data=data) + else: + print(f"Dry run, would have updated {url} with {data}")