Skip to content

Commit

Permalink
Refactor CI python scripts (#1869)
Browse files Browse the repository at this point in the history
Merged PR #1869.
  • Loading branch information
emilymye authored and modular-magician committed Jun 19, 2019
1 parent 5832e6e commit 21f721f
Show file tree
Hide file tree
Showing 10 changed files with 352 additions and 64 deletions.
42 changes: 14 additions & 28 deletions .ci/magic-modules/ensure_downstreams_merged.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,22 @@
#!/usr/bin/env python
"""
This script takes the name of a file containing an upstream PR number
and returns an error if not all of its downstreams have been merged.
import get_downstream_prs
from github import Github
Required env vars:
GH_TOKEN: Github token
"""
import os
import re
import operator
import itertools
import sys


def get_unmerged_prs(g, dependencies):
parsed_dependencies = [re.match(r'https://github.com/([\w-]+/[\w-]+)/pull/(\d+)', d).groups()
for d in dependencies]
parsed_dependencies.sort(key=operator.itemgetter(0))
unmerged_dependencies = []
# group those dependencies by repo - e.g. [("terraform-provider-google", ["123", "456"]), ...]
for r, pulls in itertools.groupby(parsed_dependencies, key=operator.itemgetter(0)):
repo = g.get_repo(r)
for pull in pulls:
# check whether the PR is merged - if it is, add it to the list.
pr = repo.get_pull(int(pull[1]))
if not pr.is_merged() and not pr.state == "closed":
unmerged_dependencies.append(pull)
return unmerged_dependencies

from github import Github
from pyutils import downstreams

if __name__ == '__main__':
g = Github(os.environ.get('GH_TOKEN'))
assert len(sys.argv) == 2
id_filename = sys.argv[1]
unmerged = get_unmerged_prs(
g, get_downstream_prs.get_github_dependencies(
g, int(open(id_filename).read())))
assert len(sys.argv) == 2, "expected id filename as argument"
with open(sys.argv[1]) as f:
pr_num = int(f.read())

client = Github(os.environ.get('GH_TOKEN'))
unmerged = downstreams.find_unmerged_downstreams(client, pr_num)
if unmerged:
raise ValueError("some PRs are unmerged", unmerged)
25 changes: 8 additions & 17 deletions .ci/magic-modules/get_downstream_prs.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,23 +1,14 @@
#!/usr/bin/env python
import functools
import os
import re
import sys
from github import Github

def append_github_dependencies_to_list(lst, comment_body):
list_of_urls = re.findall(r'^depends: (https://github.com/.*)', comment_body, re.MULTILINE)
return lst + list_of_urls

def get_github_dependencies(g, pr_number):
pull_request = g.get_repo('GoogleCloudPlatform/magic-modules').get_pull(pr_number)
comment_bodies = [c.body for c in pull_request.get_issue_comments()]
# "reduce" is "foldl" - apply this function to the result of the previous function and
# the next value in the iterable.
return functools.reduce(append_github_dependencies_to_list, comment_bodies, [])
from pyutils import downstreams

if __name__ == '__main__':
g = Github(os.environ.get('GH_TOKEN'))
assert len(sys.argv) == 2
for downstream_pr in get_github_dependencies(g, int(sys.argv[1])):
print downstream_pr
assert len(sys.argv) == 2, "expected a Github PR ID as argument"
upstream_pr = int(sys.argv[1])

downstream_urls = downstreams.get_downstream_urls(
Github(os.environ.get('GH_TOKEN')), upstream_pr)
for url in downstream_urls:
print url
58 changes: 39 additions & 19 deletions .ci/magic-modules/get_merged_patches.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,45 @@
#!/usr/bin/env python
import get_downstream_prs
import itertools
import re
import operator
import os
import urllib

from github import Github
from pyutils import downstreams

def get_merged_patches(gh):
"""Download all merged patches for open upstream PRs.
Args:
gh: Github client to make calls to Github with.
"""
open_pulls = gh.get_repo('GoogleCloudPlatform/magic-modules')\
.get_pulls(state='open')
for open_pr in open_pulls:
print 'Downloading patches for upstream PR %d...' % open_pr.number
parsed_urls = downstreams.get_parsed_downstream_urls(gh, open_pr.number)
for repo_name, pulls in parsed_urls:
repo = gh.get_repo(repo_name)
for r, pr_num in pulls:
print 'Check to see if %s/%s is merged and should be downloaded\n' % (
r, pr_num)
downstream_pr = repo.get_pull(int(pr_num))
if downstream_pr.is_merged():
download_patch(r, downstream_pr)

def download_patch(repo, pr):
"""Download merged downstream PR patch.
Args:
pr: Github Pull request to download patch for
"""
download_location = os.path.join('./patches', repo_name, '%d.patch' % pr.id)
print download_location
# Skip already downloaded patches
if os.path.exists(download_location):
return

if not os.path.exists(os.path.dirname(download_location)):
os.makedirs(os.path.dirname(download_location))
urllib.urlretrieve(pr.patch_url, download_location)

if __name__ == '__main__':
g = Github(os.environ.get('GH_TOKEN'))
open_pulls = g.get_repo('GoogleCloudPlatform/magic-modules').get_pulls(state='open')
depends = [item for sublist in [get_downstream_prs.get_github_dependencies(g, open_pull.number) for open_pull in open_pulls] for item in sublist]
parsed_dependencies = [re.match(r'https://github.com/([\w-]+/[\w-]+)/pull/(\d+)', d).groups() for d in depends]
for r, pulls in itertools.groupby(parsed_dependencies, key=operator.itemgetter(0)):
repo = g.get_repo(r)
for pull in pulls:
pr = repo.get_pull(int(pull[1]))
print 'Checking %s to see if it should be downloaded.' % (pr,)
if pr.is_merged():
download_location = os.path.join('./patches', pull[0], pull[1] + '.patch')
if not os.path.exists(os.path.dirname(download_location)):
os.makedirs(os.path.dirname(download_location))
urllib.urlretrieve(pr.patch_url, download_location)
gh = Github(os.environ.get('GH_TOKEN'))
get_merged_patches(gh)
50 changes: 50 additions & 0 deletions .ci/magic-modules/pyutils/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Magic Modules CI Utils

This directory manages all Python utils that the Magician uses to take upstream Magic Module PRs and generate and manage PRs in various downstream repos.

What this shouldn't contain:

- Python scripts called directly by Concourse jobs.
- Non-Python code

## Tests

Currently we use the standard [unittest](https://docs.python.org/3/library/unittest.html) library. Because CI development is mostly done locally on your developer machine before being directly deployed, these tests are run manually.

This section reviews running/writing tests for someone fairly new to Python/unittest, so some of this information is just from unittest docs.

### Running tests

Set a test environment variable to make calls to Github:
```
export TEST_GITHUB_TOKEN=...
```

Otherwise, tests calling Github will be ignored (or likely be rate-limited).
```
cd pyutils
python -m unittest discover -p "*_test.py"
python ./changelog_utils_test.py
```

Read [unittest](https://docs.python.org/3/library/unittest.html#command-line-interface) docs to see how to run tests at finer granularity.

*NOTE*: Don't forget to delete .pyc files if you feel like tests aren't reflecting your changes!

### Writing Tests:

This is mostly a very shallow review of unittest, but your test should inherit from the `unittest.TestCase` class in some way (i.e. we haven't had the need to write our own TestCase-inheriting Test class but feel free to in the future if needed).

```
class MyModuleTest(unittest.TestCase):
```

Make sure to include the following at the bottom of your test file, so it defaults to running the tests in this file if run as a normal Python script.
```
if __name__ == '__main__':
unittest.main()
```



Empty file.
89 changes: 89 additions & 0 deletions .ci/magic-modules/pyutils/downstreams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Helper class for obtaining information about upstream PR and its downstreams.
Typical usage example:
import upstream_pull_request
client = github.Github(github_token)
downstreams = upstream_pull_request.downstream_urls(client, 100)
"""

import os
import re
import sys
import itertools
import operator
from strutils import *

UPSTREAM_REPO = 'GoogleCloudPlatform/magic-modules'

def find_unmerged_downstreams(client, pr_num):
"""Returns list of urls for unmerged, open downstreams.
For each downstream PR URL found from get_parsed_downstream_urls(),
fetches the status of each downstream PR to determine which PRs are still
unmerged (i.e. not closed and not merged).
Args:
client: github.Github client
pr_num: PR Number for upstream PR
Returns:
All unmerged downstreams found for a PR.
"""
unmerged_dependencies = []
for r, pulls in get_parsed_downstream_urls(client, pr_num):
repo = client.get_repo(r)
for _repo, pr_num in pulls:
pr = repo.get_pull(int(pr_num))
# Disregard merged or closed PRs.
if not pr.is_merged() and not pr.state == "closed":
unmerged_dependencies.append(pr.html_url)

return unmerged_dependencies

def get_parsed_downstream_urls(client, pr_num):
"""Get parsed URLs for downstream PRs grouped by repo.
For each downstream PR URL referenced by the upstream PR, this method
parses the downstream repo name
(i.e. "terraform-providers/terraform-providers-google") and PR number
(e.g. 100) and groups them by repo name so calling code only needs to fetch
each repo once.
Example:
parsed = UpstreamPullRequest(pr_num).parsed_downstream_urls
for repo, repo_pulls in parsed:
for _repo, pr in repo_pulls:
print "Downstream is https://github.com/%s/pull/%d" % (repo, pr)
Args:
client: github.Github client
pr_num: PR Number for upstream PR
Returns:
Iterator over $repo and sub-iterators of ($repo, $pr_num) parsed tuples
"""
parsed = [parse_github_url(u) for u in get_downstream_urls(client, pr_num)]
return itertools.groupby(parsed, key=operator.itemgetter(0))

def get_downstream_urls(client, pr_num):
"""Get list of URLs for downstream PRs.
This fetches the upstream PR and finds its downstream PR URLs by
searching for references in its comments.
Args:
client: github.Github client
pr_num: PR Number for upstream PR
Returns:
List of downstream PR URLs.
"""
urls = []
pr = client.get_repo(UPSTREAM_REPO).get_pull(pr_num)
for comment in pr.get_issue_comments():
urls = urls + find_dependency_urls_in_comment(comment.body)
return urls


75 changes: 75 additions & 0 deletions .ci/magic-modules/pyutils/downstreams_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from downstreams import *
import unittest
import os
from github import Github

TOKEN_ENV_VAR = "TEST_GITHUB_TOKEN"

class TestUpstreamPullRequests(unittest.TestCase):
"""
Terrible test data from scraping
https://github.com/GoogleCloudPlatform/magic-modules/pull/1000
TODO: If this test becomes load-bearing, mock out the Github client instead
of using this.
"""
TEST_PR_NUM = 1000
EXPECTED_DOWNSTREAM_URLS = [
"https://github.com/terraform-providers/terraform-provider-google-beta/pull/186",
"https://github.com/terraform-providers/terraform-provider-google/pull/2591",
"https://github.com/modular-magician/ansible/pull/142",
]
EXPECTED_PARSED_DOWNSTREAMS = {
"terraform-providers/terraform-provider-google-beta": [186],
"terraform-providers/terraform-provider-google": [2591],
"modular-magician/ansible": [142],
}

def setUp(self):
gh_token = os.environ.get(TOKEN_ENV_VAR)
if not gh_token:
self.skipTest(
"test env var %s not set, skip tests calling Github" % TOKEN_ENV_VAR)
self.test_client = Github(gh_token)

def test_find_unmerged_downstreams(self):
self.assertFalse(find_unmerged_downstreams(self.test_client, self.TEST_PR_NUM))

def test_parsed_downstream_urls(self):
result = get_parsed_downstream_urls(self.test_client, self.TEST_PR_NUM)
repo_cnt = 0
for repo, pulls in result:
# Verify each repo in result.
self.assertIn(repo, self.EXPECTED_PARSED_DOWNSTREAMS,
"unexpected repo %s in result" % repo)
repo_cnt += 1

# Verify each pull request in result.
expected_pulls = self.EXPECTED_PARSED_DOWNSTREAMS[repo]
pull_cnt = 0
for repo, prid in pulls:
self.assertIn(int(prid), expected_pulls)
pull_cnt += 1
# Verify exact count of pulls (here because iterator).
self.assertEquals(pull_cnt, len(expected_pulls),
"expected %d pull requests in result[%s]" % (len(expected_pulls), repo))

# Verify exact count of repos (here because iterator).
self.assertEquals(repo_cnt, len(self.EXPECTED_PARSED_DOWNSTREAMS),
"expected %d pull requests in result[%s]" % (
len(self.EXPECTED_PARSED_DOWNSTREAMS), repo))

def test_downstream_urls(self):
test_client = Github(os.environ.get(TOKEN_ENV_VAR))
result = get_downstream_urls(self.test_client,self.TEST_PR_NUM)

expected_cnt = len(self.EXPECTED_DOWNSTREAM_URLS)
self.assertEquals(len(result), expected_cnt,
"expected %d downstream urls, got %d" % (expected_cnt, len(result)))
for url in result:
self.assertIn(str(url), self.EXPECTED_DOWNSTREAM_URLS)


if __name__ == '__main__':
unittest.main()


Loading

0 comments on commit 21f721f

Please sign in to comment.