From 21f721fa7ad6e2f8fa2d4fc8231d84f46be9fbbe Mon Sep 17 00:00:00 2001 From: emily Date: Wed, 19 Jun 2019 11:43:23 -0700 Subject: [PATCH] Refactor CI python scripts (#1869) Merged PR #1869. --- .../ensure_downstreams_merged.py | 42 +++------ .ci/magic-modules/get_downstream_prs.py | 25 ++---- .ci/magic-modules/get_merged_patches.py | 58 ++++++++---- .ci/magic-modules/pyutils/README.md | 50 +++++++++++ .ci/magic-modules/pyutils/__init__.py | 0 .ci/magic-modules/pyutils/downstreams.py | 89 +++++++++++++++++++ .ci/magic-modules/pyutils/downstreams_test.py | 75 ++++++++++++++++ .ci/magic-modules/pyutils/strutils.py | 38 ++++++++ .ci/magic-modules/pyutils/strutils_test.py | 37 ++++++++ .gitignore | 2 + 10 files changed, 352 insertions(+), 64 deletions(-) mode change 100644 => 100755 .ci/magic-modules/get_downstream_prs.py create mode 100644 .ci/magic-modules/pyutils/README.md create mode 100644 .ci/magic-modules/pyutils/__init__.py create mode 100644 .ci/magic-modules/pyutils/downstreams.py create mode 100644 .ci/magic-modules/pyutils/downstreams_test.py create mode 100644 .ci/magic-modules/pyutils/strutils.py create mode 100644 .ci/magic-modules/pyutils/strutils_test.py diff --git a/.ci/magic-modules/ensure_downstreams_merged.py b/.ci/magic-modules/ensure_downstreams_merged.py index 68bfd55ed90e..25867f22c6cd 100755 --- a/.ci/magic-modules/ensure_downstreams_merged.py +++ b/.ci/magic-modules/ensure_downstreams_merged.py @@ -1,36 +1,22 @@ #!/usr/bin/env python +""" +This script takes the name of a file containing an upstream PR number +and returns an error if not all of its downstreams have been merged. -import get_downstream_prs -from github import Github +Required env vars: + GH_TOKEN: Github token +""" import os -import re -import operator -import itertools import sys - - -def get_unmerged_prs(g, dependencies): - parsed_dependencies = [re.match(r'https://github.com/([\w-]+/[\w-]+)/pull/(\d+)', d).groups() - for d in dependencies] - parsed_dependencies.sort(key=operator.itemgetter(0)) - unmerged_dependencies = [] - # group those dependencies by repo - e.g. [("terraform-provider-google", ["123", "456"]), ...] - for r, pulls in itertools.groupby(parsed_dependencies, key=operator.itemgetter(0)): - repo = g.get_repo(r) - for pull in pulls: - # check whether the PR is merged - if it is, add it to the list. - pr = repo.get_pull(int(pull[1])) - if not pr.is_merged() and not pr.state == "closed": - unmerged_dependencies.append(pull) - return unmerged_dependencies - +from github import Github +from pyutils import downstreams if __name__ == '__main__': - g = Github(os.environ.get('GH_TOKEN')) - assert len(sys.argv) == 2 - id_filename = sys.argv[1] - unmerged = get_unmerged_prs( - g, get_downstream_prs.get_github_dependencies( - g, int(open(id_filename).read()))) + assert len(sys.argv) == 2, "expected id filename as argument" + with open(sys.argv[1]) as f: + pr_num = int(f.read()) + + client = Github(os.environ.get('GH_TOKEN')) + unmerged = downstreams.find_unmerged_downstreams(client, pr_num) if unmerged: raise ValueError("some PRs are unmerged", unmerged) diff --git a/.ci/magic-modules/get_downstream_prs.py b/.ci/magic-modules/get_downstream_prs.py old mode 100644 new mode 100755 index d183323efd3f..ce4c7c739da0 --- a/.ci/magic-modules/get_downstream_prs.py +++ b/.ci/magic-modules/get_downstream_prs.py @@ -1,23 +1,14 @@ #!/usr/bin/env python -import functools import os -import re import sys from github import Github - -def append_github_dependencies_to_list(lst, comment_body): - list_of_urls = re.findall(r'^depends: (https://github.com/.*)', comment_body, re.MULTILINE) - return lst + list_of_urls - -def get_github_dependencies(g, pr_number): - pull_request = g.get_repo('GoogleCloudPlatform/magic-modules').get_pull(pr_number) - comment_bodies = [c.body for c in pull_request.get_issue_comments()] - # "reduce" is "foldl" - apply this function to the result of the previous function and - # the next value in the iterable. - return functools.reduce(append_github_dependencies_to_list, comment_bodies, []) +from pyutils import downstreams if __name__ == '__main__': - g = Github(os.environ.get('GH_TOKEN')) - assert len(sys.argv) == 2 - for downstream_pr in get_github_dependencies(g, int(sys.argv[1])): - print downstream_pr + assert len(sys.argv) == 2, "expected a Github PR ID as argument" + upstream_pr = int(sys.argv[1]) + + downstream_urls = downstreams.get_downstream_urls( + Github(os.environ.get('GH_TOKEN')), upstream_pr) + for url in downstream_urls: + print url diff --git a/.ci/magic-modules/get_merged_patches.py b/.ci/magic-modules/get_merged_patches.py index 38f30749ad53..dd4ba73a152a 100755 --- a/.ci/magic-modules/get_merged_patches.py +++ b/.ci/magic-modules/get_merged_patches.py @@ -1,25 +1,45 @@ #!/usr/bin/env python -import get_downstream_prs -import itertools -import re -import operator import os import urllib - from github import Github +from pyutils import downstreams + +def get_merged_patches(gh): + """Download all merged patches for open upstream PRs. + + Args: + gh: Github client to make calls to Github with. + """ + open_pulls = gh.get_repo('GoogleCloudPlatform/magic-modules')\ + .get_pulls(state='open') + for open_pr in open_pulls: + print 'Downloading patches for upstream PR %d...' % open_pr.number + parsed_urls = downstreams.get_parsed_downstream_urls(gh, open_pr.number) + for repo_name, pulls in parsed_urls: + repo = gh.get_repo(repo_name) + for r, pr_num in pulls: + print 'Check to see if %s/%s is merged and should be downloaded\n' % ( + r, pr_num) + downstream_pr = repo.get_pull(int(pr_num)) + if downstream_pr.is_merged(): + download_patch(r, downstream_pr) + +def download_patch(repo, pr): + """Download merged downstream PR patch. + + Args: + pr: Github Pull request to download patch for + """ + download_location = os.path.join('./patches', repo_name, '%d.patch' % pr.id) + print download_location + # Skip already downloaded patches + if os.path.exists(download_location): + return + + if not os.path.exists(os.path.dirname(download_location)): + os.makedirs(os.path.dirname(download_location)) + urllib.urlretrieve(pr.patch_url, download_location) if __name__ == '__main__': - g = Github(os.environ.get('GH_TOKEN')) - open_pulls = g.get_repo('GoogleCloudPlatform/magic-modules').get_pulls(state='open') - depends = [item for sublist in [get_downstream_prs.get_github_dependencies(g, open_pull.number) for open_pull in open_pulls] for item in sublist] - parsed_dependencies = [re.match(r'https://github.com/([\w-]+/[\w-]+)/pull/(\d+)', d).groups() for d in depends] - for r, pulls in itertools.groupby(parsed_dependencies, key=operator.itemgetter(0)): - repo = g.get_repo(r) - for pull in pulls: - pr = repo.get_pull(int(pull[1])) - print 'Checking %s to see if it should be downloaded.' % (pr,) - if pr.is_merged(): - download_location = os.path.join('./patches', pull[0], pull[1] + '.patch') - if not os.path.exists(os.path.dirname(download_location)): - os.makedirs(os.path.dirname(download_location)) - urllib.urlretrieve(pr.patch_url, download_location) + gh = Github(os.environ.get('GH_TOKEN')) + get_merged_patches(gh) diff --git a/.ci/magic-modules/pyutils/README.md b/.ci/magic-modules/pyutils/README.md new file mode 100644 index 000000000000..8570ed3be9e7 --- /dev/null +++ b/.ci/magic-modules/pyutils/README.md @@ -0,0 +1,50 @@ +# Magic Modules CI Utils + +This directory manages all Python utils that the Magician uses to take upstream Magic Module PRs and generate and manage PRs in various downstream repos. + +What this shouldn't contain: + +- Python scripts called directly by Concourse jobs. +- Non-Python code + +## Tests + +Currently we use the standard [unittest](https://docs.python.org/3/library/unittest.html) library. Because CI development is mostly done locally on your developer machine before being directly deployed, these tests are run manually. + +This section reviews running/writing tests for someone fairly new to Python/unittest, so some of this information is just from unittest docs. + +### Running tests + +Set a test environment variable to make calls to Github: +``` +export TEST_GITHUB_TOKEN=... +``` + +Otherwise, tests calling Github will be ignored (or likely be rate-limited). +``` +cd pyutils + +python -m unittest discover -p "*_test.py" +python ./changelog_utils_test.py +``` + +Read [unittest](https://docs.python.org/3/library/unittest.html#command-line-interface) docs to see how to run tests at finer granularity. + +*NOTE*: Don't forget to delete .pyc files if you feel like tests aren't reflecting your changes! + +### Writing Tests: + +This is mostly a very shallow review of unittest, but your test should inherit from the `unittest.TestCase` class in some way (i.e. we haven't had the need to write our own TestCase-inheriting Test class but feel free to in the future if needed). + +``` +class MyModuleTest(unittest.TestCase): +``` + +Make sure to include the following at the bottom of your test file, so it defaults to running the tests in this file if run as a normal Python script. +``` +if __name__ == '__main__': + unittest.main() +``` + + + diff --git a/.ci/magic-modules/pyutils/__init__.py b/.ci/magic-modules/pyutils/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/.ci/magic-modules/pyutils/downstreams.py b/.ci/magic-modules/pyutils/downstreams.py new file mode 100644 index 000000000000..7f1a2be941d4 --- /dev/null +++ b/.ci/magic-modules/pyutils/downstreams.py @@ -0,0 +1,89 @@ +"""Helper class for obtaining information about upstream PR and its downstreams. + + Typical usage example: + + import upstream_pull_request + + client = github.Github(github_token) + downstreams = upstream_pull_request.downstream_urls(client, 100) + +""" + +import os +import re +import sys +import itertools +import operator +from strutils import * + +UPSTREAM_REPO = 'GoogleCloudPlatform/magic-modules' + +def find_unmerged_downstreams(client, pr_num): + """Returns list of urls for unmerged, open downstreams. + + For each downstream PR URL found from get_parsed_downstream_urls(), + fetches the status of each downstream PR to determine which PRs are still + unmerged (i.e. not closed and not merged). + + Args: + client: github.Github client + pr_num: PR Number for upstream PR + Returns: + All unmerged downstreams found for a PR. + """ + unmerged_dependencies = [] + for r, pulls in get_parsed_downstream_urls(client, pr_num): + repo = client.get_repo(r) + for _repo, pr_num in pulls: + pr = repo.get_pull(int(pr_num)) + # Disregard merged or closed PRs. + if not pr.is_merged() and not pr.state == "closed": + unmerged_dependencies.append(pr.html_url) + + return unmerged_dependencies + +def get_parsed_downstream_urls(client, pr_num): + """Get parsed URLs for downstream PRs grouped by repo. + + For each downstream PR URL referenced by the upstream PR, this method + parses the downstream repo name + (i.e. "terraform-providers/terraform-providers-google") and PR number + (e.g. 100) and groups them by repo name so calling code only needs to fetch + each repo once. + + Example: + parsed = UpstreamPullRequest(pr_num).parsed_downstream_urls + for repo, repo_pulls in parsed: + for _repo, pr in repo_pulls: + print "Downstream is https://github.com/%s/pull/%d" % (repo, pr) + + Args: + client: github.Github client + pr_num: PR Number for upstream PR + + Returns: + Iterator over $repo and sub-iterators of ($repo, $pr_num) parsed tuples + """ + parsed = [parse_github_url(u) for u in get_downstream_urls(client, pr_num)] + return itertools.groupby(parsed, key=operator.itemgetter(0)) + +def get_downstream_urls(client, pr_num): + """Get list of URLs for downstream PRs. + + This fetches the upstream PR and finds its downstream PR URLs by + searching for references in its comments. + + Args: + client: github.Github client + pr_num: PR Number for upstream PR + + Returns: + List of downstream PR URLs. + """ + urls = [] + pr = client.get_repo(UPSTREAM_REPO).get_pull(pr_num) + for comment in pr.get_issue_comments(): + urls = urls + find_dependency_urls_in_comment(comment.body) + return urls + + diff --git a/.ci/magic-modules/pyutils/downstreams_test.py b/.ci/magic-modules/pyutils/downstreams_test.py new file mode 100644 index 000000000000..be70c9b0b0d0 --- /dev/null +++ b/.ci/magic-modules/pyutils/downstreams_test.py @@ -0,0 +1,75 @@ +from downstreams import * +import unittest +import os +from github import Github + +TOKEN_ENV_VAR = "TEST_GITHUB_TOKEN" + +class TestUpstreamPullRequests(unittest.TestCase): + """ + Terrible test data from scraping + https://github.com/GoogleCloudPlatform/magic-modules/pull/1000 + TODO: If this test becomes load-bearing, mock out the Github client instead + of using this. + """ + TEST_PR_NUM = 1000 + EXPECTED_DOWNSTREAM_URLS = [ + "https://github.com/terraform-providers/terraform-provider-google-beta/pull/186", + "https://github.com/terraform-providers/terraform-provider-google/pull/2591", + "https://github.com/modular-magician/ansible/pull/142", + ] + EXPECTED_PARSED_DOWNSTREAMS = { + "terraform-providers/terraform-provider-google-beta": [186], + "terraform-providers/terraform-provider-google": [2591], + "modular-magician/ansible": [142], + } + + def setUp(self): + gh_token = os.environ.get(TOKEN_ENV_VAR) + if not gh_token: + self.skipTest( + "test env var %s not set, skip tests calling Github" % TOKEN_ENV_VAR) + self.test_client = Github(gh_token) + + def test_find_unmerged_downstreams(self): + self.assertFalse(find_unmerged_downstreams(self.test_client, self.TEST_PR_NUM)) + + def test_parsed_downstream_urls(self): + result = get_parsed_downstream_urls(self.test_client, self.TEST_PR_NUM) + repo_cnt = 0 + for repo, pulls in result: + # Verify each repo in result. + self.assertIn(repo, self.EXPECTED_PARSED_DOWNSTREAMS, + "unexpected repo %s in result" % repo) + repo_cnt += 1 + + # Verify each pull request in result. + expected_pulls = self.EXPECTED_PARSED_DOWNSTREAMS[repo] + pull_cnt = 0 + for repo, prid in pulls: + self.assertIn(int(prid), expected_pulls) + pull_cnt += 1 + # Verify exact count of pulls (here because iterator). + self.assertEquals(pull_cnt, len(expected_pulls), + "expected %d pull requests in result[%s]" % (len(expected_pulls), repo)) + + # Verify exact count of repos (here because iterator). + self.assertEquals(repo_cnt, len(self.EXPECTED_PARSED_DOWNSTREAMS), + "expected %d pull requests in result[%s]" % ( + len(self.EXPECTED_PARSED_DOWNSTREAMS), repo)) + + def test_downstream_urls(self): + test_client = Github(os.environ.get(TOKEN_ENV_VAR)) + result = get_downstream_urls(self.test_client,self.TEST_PR_NUM) + + expected_cnt = len(self.EXPECTED_DOWNSTREAM_URLS) + self.assertEquals(len(result), expected_cnt, + "expected %d downstream urls, got %d" % (expected_cnt, len(result))) + for url in result: + self.assertIn(str(url), self.EXPECTED_DOWNSTREAM_URLS) + + +if __name__ == '__main__': + unittest.main() + + diff --git a/.ci/magic-modules/pyutils/strutils.py b/.ci/magic-modules/pyutils/strutils.py new file mode 100644 index 000000000000..aac424339a88 --- /dev/null +++ b/.ci/magic-modules/pyutils/strutils.py @@ -0,0 +1,38 @@ +import re + +def find_dependency_urls_in_comment(body): + """Util to parse downstream dependencies from a given comment body. + + Example: + $ find_dependency_urls_in_comment(\""" + This is a comment on an MM PR. + + depends: https://github.com/ownerFoo/repoFoo/pull/100 + depends: https://github.com/ownerBar/repoBar/pull/10 + \""") + [https://github.com/ownerFoo/repoFoo/pull/100, + https://github.com/ownerBar/repoBar/pull/10] + + Args: + body (string): Text of comment in upstream PR + + Returns: + List of PR URLs found. + """ + return re.findall( + r'^depends: (https://github.com/[^\s]*)', body, re.MULTILINE) + +def parse_github_url(gh_url): + """Util to parse Github repo/PR id from a Github PR URL. + + Args: + gh_url (string): URL of Github pull request. + + Returns: + Tuple of (repo name, pr number) + """ + matches = re.match(r'https://github.com/([\w-]+/[\w-]+)/pull/(\d+)', gh_url) + if matches: + repo, prnum = matches.groups() + return (repo, int(prnum)) + return None \ No newline at end of file diff --git a/.ci/magic-modules/pyutils/strutils_test.py b/.ci/magic-modules/pyutils/strutils_test.py new file mode 100644 index 000000000000..86e542da2266 --- /dev/null +++ b/.ci/magic-modules/pyutils/strutils_test.py @@ -0,0 +1,37 @@ +from strutils import * +import unittest +import os +from github import Github + +class TestStringUtils(unittest.TestCase): + def test_find_dependency_urls(self): + test_urls = [ + "https://github.com/repo-owner/repo-A/pull/1", + "https://github.com/repo-owner/repo-A/pull/2", + "https://github.com/repo-owner/repo-B/pull/3", + ] + test_body = "".join(["\ndepends: %s\n" % u for u in test_urls]) + result = find_dependency_urls_in_comment(test_body) + self.assertEquals(len(result), len(test_urls), + "expected %d urls to be parsed from comment" % len(test_urls)) + for test_url in test_urls: + self.assertIn(test_url, result) + + def test_parse_github_url(self): + test_cases = { + "https://github.com/repoowner/reponame/pull/1234": ("repoowner/reponame", 1234), + "not a real url": None, + } + for k in test_cases: + result = parse_github_url(k) + expected = test_cases[k] + if not expected: + self.assertIsNone(result, "expected None, got %s" % result) + else: + self.assertEquals(result[0], expected[0]) + self.assertEquals(int(result[1]), expected[1]) + +if __name__ == '__main__': + unittest.main() + + diff --git a/.gitignore b/.gitignore index 8fd03bda00d5..bee13e6aafac 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,9 @@ *# *.swp +# Python *.pyc +*.python-version # IDEA files .idea/*