diff --git a/.github/workflows/tag_teams.yml b/.github/workflows/tag_teams.yml new file mode 100644 index 0000000000000..2570da7fca864 --- /dev/null +++ b/.github/workflows/tag_teams.yml @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# GH actions. +# We use it to cover windows and mac builds +# Jenkins is still the primary CI + +name: Teams + +on: + # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target + pull_request_target: + types: [opened, reopened, edited, ready_for_review, labeled] + issues: + types: [opened, edited, reopened, labeled] + +concurrency: + group: Teams-${{ github.event.pull_request.number }}-${{ github.event.issue.number }} + cancel-in-progress: true + +jobs: + tag-teams: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: "recursive" + - name: Tag people from relevant teams + env: + PR: ${{ toJson(github.event.pull_request) }} + ISSUE: ${{ toJson(github.event.issue) }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + python tests/scripts/github_tag_teams.py diff --git a/tests/scripts/git_utils.py b/tests/scripts/git_utils.py index 530abe8029a6e..5868e05bb0570 100644 --- a/tests/scripts/git_utils.py +++ b/tests/scripts/git_utils.py @@ -20,7 +20,7 @@ import subprocess import re from urllib import request -from typing import Dict, Tuple, Any +from typing import Dict, Tuple, Any, Optional class GitHubRepo: @@ -35,8 +35,16 @@ def headers(self): "Authorization": f"Bearer {self.token}", } - def graphql(self, query: str) -> Dict[str, Any]: - return self._post("https://api.github.com/graphql", {"query": query}) + def graphql(self, query: str, variables: Optional[Dict[str, str]] = None) -> Dict[str, Any]: + if variables is None: + variables = {} + response = self._post( + "https://api.github.com/graphql", {"query": query, "variables": variables} + ) + if "data" not in response: + msg = f"Error fetching data with query:\n{query}\n\nvariables:\n{variables}\n\nerror:\n{json.dumps(response, indent=2)}" + raise RuntimeError(msg) + return response def _post(self, full_url: str, body: Dict[str, Any]) -> Dict[str, Any]: print("Requesting POST to", full_url, "with", body) diff --git a/tests/scripts/github_tag_teams.py b/tests/scripts/github_tag_teams.py new file mode 100644 index 0000000000000..0a6f72fd44a43 --- /dev/null +++ b/tests/scripts/github_tag_teams.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import json +import argparse +import re +from urllib import error +from typing import Dict, Any, List, Tuple + + +from git_utils import git, GitHubRepo, parse_remote + + +def find_reviewers(body: str) -> List[str]: + print(f"Parsing body:\n{body}") + matches = re.findall(r"(cc( @[-A-Za-z0-9]+)+)", body, flags=re.MULTILINE) + matches = [full for full, last in matches] + + print("Found matches:", matches) + reviewers = [] + for match in matches: + if match.startswith("cc "): + match = match.replace("cc ", "") + users = [x.strip() for x in match.split("@")] + reviewers += users + + reviewers = set(x for x in reviewers if x != "") + return sorted(list(reviewers)) + + +def parse_line(line: str) -> Tuple[str, List[str]]: + line = line.lstrip(" -") + line = line.split() + + # Parse out the name as anything up until the first tagged person + tag_items = [] + tag_end = 0 + for i, piece in enumerate(line): + if piece.startswith("@"): + tag_end = i + break + + tag_items.append(piece) + + tag = " ".join(tag_items) + + # From the last word that was part of the tag name, start looking for users + # tagged with a '@' + users = [] + for piece in line[tag_end:]: + if piece.startswith("@"): + users.append(piece.lstrip("@")) + + return (tag, users) + + +def parse_teams(github: GitHubRepo, issue: int) -> Dict[str, str]: + """ + Fetch an issue and parse out series of tagged people from the issue body + and comments + """ + + query = """query($owner: String!, $name: String!, $number: Int!){ + repository(owner: $owner, name: $name) { + issue(number: $number) { + body + comments(first:100) { + nodes { + body + } + } + } + } + }""" + r = github.graphql( + query, + variables={ + "owner": github.user, + "name": github.repo, + "number": issue, + }, + ) + r = r["data"]["repository"]["issue"] + + result = {} + + def add_tag(tag, users): + if tag in result: + result[tag] += users + else: + result[tag] = users + + # Parse the issue body (only bullets are looked at) + for line in r["body"].split("\n"): + line = line.strip() + if not line.startswith("- "): + continue + if "@" not in line: + continue + + tag, users = parse_line(line) + add_tag(tag, users) + + # Parse comment bodies + for comment in r["comments"]["nodes"]: + for line in comment["body"].split("\n"): + if "@" not in line: + continue + + tag, users = parse_line(line) + add_tag(tag, users) + + # De-duplicate users listed twice for the same tag + for tag in result: + result[tag] = list(set(result[tag])) + + return result + + +if __name__ == "__main__": + help = "Automatically tag people based on PR / issue labels" + parser = argparse.ArgumentParser(description=help) + parser.add_argument("--remote", default="origin", help="ssh remote to parse") + parser.add_argument("--team-issue", default="10317", help="ssh remote to parse") + parser.add_argument( + "--dry-run", + action="store_true", + default=False, + help="run but don't send any request to GitHub", + ) + args = parser.parse_args() + + remote = git(["config", "--get", f"remote.{args.remote}.url"]) + user, repo = parse_remote(remote) + + github = GitHubRepo(token=os.environ["GITHUB_TOKEN"], user=user, repo=repo) + + # Fetch the list of teams + teams = parse_teams(github, issue=int(args.team_issue)) + print(f"Found these teams in issue #{args.team_issue}\n{json.dumps(teams, indent=2)}") + + # Update the PR or issue based on tags in the title and GitHub tags + to_cc = [] + + # pr = json.loads(os.environ["PR"]) + + # number = pr["number"] + # body = pr["body"] + # if body is None: + # body = "" + + # to_add = find_reviewers(body) + # print("Adding reviewers:", to_add) + + # if not args.dry_run: + + # # Add reviewers 1 by 1 since GitHub will error out if any of the + # # requested reviewers aren't members / contributors + # for reviewer in to_add: + # try: + # github.post(f"pulls/{number}/requested_reviewers", {"reviewers": [reviewer]}) + # except error.HTTPError as e: + # print(f"Failed to add reviewer {reviewer}: {e}")