Refactor CI python scripts (#1869)

Merged PR #1869.
GoogleCloudPlatform · Jun 19, 2019 · 21f721f · 21f721f
1 parent 5832e6e
commit 21f721f
Show file tree

Hide file tree

Showing 10 changed files with 352 additions and 64 deletions.
diff --git a/.ci/magic-modules/ensure_downstreams_merged.py b/.ci/magic-modules/ensure_downstreams_merged.py
@@ -1,36 +1,22 @@
 #!/usr/bin/env python
+"""
+This script takes the name of a file containing an upstream PR number
+and returns an error if not all of its downstreams have been merged.
 
-import get_downstream_prs
-from github import Github
+Required env vars:
+  GH_TOKEN: Github token
+"""
 import os
-import re
-import operator
-import itertools
 import sys
-
-
-def get_unmerged_prs(g, dependencies):
-  parsed_dependencies = [re.match(r'https://github.com/([\w-]+/[\w-]+)/pull/(\d+)', d).groups()
-          for d in dependencies]
-  parsed_dependencies.sort(key=operator.itemgetter(0))
-  unmerged_dependencies = []
-  # group those dependencies by repo - e.g. [("terraform-provider-google", ["123", "456"]), ...]
-  for r, pulls in itertools.groupby(parsed_dependencies, key=operator.itemgetter(0)):
-    repo = g.get_repo(r)
-    for pull in pulls:
-      # check whether the PR is merged - if it is, add it to the list.
-      pr = repo.get_pull(int(pull[1]))
-      if not pr.is_merged() and not pr.state == "closed":
-        unmerged_dependencies.append(pull)
-  return unmerged_dependencies
-
+from github import Github
+from pyutils import downstreams
 
 if __name__ == '__main__':
-  g = Github(os.environ.get('GH_TOKEN'))
-  assert len(sys.argv) == 2
-  id_filename = sys.argv[1]
-  unmerged = get_unmerged_prs(
-          g, get_downstream_prs.get_github_dependencies(
-              g, int(open(id_filename).read())))
+  assert len(sys.argv) == 2, "expected id filename as argument"
+  with open(sys.argv[1]) as f:
+    pr_num = int(f.read())
+
+  client = Github(os.environ.get('GH_TOKEN'))
+  unmerged = downstreams.find_unmerged_downstreams(client, pr_num)
   if unmerged:
     raise ValueError("some PRs are unmerged", unmerged)
diff --git a/.ci/magic-modules/get_downstream_prs.py b/.ci/magic-modules/get_downstream_prs.py
@@ -1,23 +1,14 @@
 #!/usr/bin/env python
-import functools
 import os
-import re
 import sys
 from github import Github
-
-def append_github_dependencies_to_list(lst, comment_body):
-  list_of_urls = re.findall(r'^depends: (https://github.com/.*)', comment_body, re.MULTILINE)
-  return lst + list_of_urls
-
-def get_github_dependencies(g, pr_number):
-  pull_request = g.get_repo('GoogleCloudPlatform/magic-modules').get_pull(pr_number)
-  comment_bodies = [c.body for c in pull_request.get_issue_comments()]
-  # "reduce" is "foldl" - apply this function to the result of the previous function and
-  # the next value in the iterable.
-  return functools.reduce(append_github_dependencies_to_list, comment_bodies, [])
+from pyutils import downstreams
 
 if __name__ == '__main__':
-  g = Github(os.environ.get('GH_TOKEN'))
-  assert len(sys.argv) == 2
-  for downstream_pr in get_github_dependencies(g, int(sys.argv[1])):
-    print downstream_pr
+  assert len(sys.argv) == 2, "expected a Github PR ID as argument"
+  upstream_pr = int(sys.argv[1])
+
+  downstream_urls = downstreams.get_downstream_urls(
+    Github(os.environ.get('GH_TOKEN')), upstream_pr)
+  for url in downstream_urls:
+    print url
diff --git a/.ci/magic-modules/get_merged_patches.py b/.ci/magic-modules/get_merged_patches.py
@@ -1,25 +1,45 @@
 #!/usr/bin/env python
-import get_downstream_prs
-import itertools
-import re
-import operator
 import os
 import urllib
-
 from github import Github
+from pyutils import downstreams
+
+def get_merged_patches(gh):
+  """Download all merged patches for open upstream PRs.
+
+  Args:
+    gh: Github client to make calls to Github with.
+  """
+  open_pulls = gh.get_repo('GoogleCloudPlatform/magic-modules')\
+                 .get_pulls(state='open')
+  for open_pr in open_pulls:
+    print 'Downloading patches for upstream PR %d...' % open_pr.number
+    parsed_urls = downstreams.get_parsed_downstream_urls(gh, open_pr.number)
+    for repo_name, pulls in parsed_urls:
+      repo = gh.get_repo(repo_name)
+      for r, pr_num in pulls:
+          print 'Check to see if %s/%s is merged and should be downloaded\n' % (
+            r, pr_num)
+          downstream_pr = repo.get_pull(int(pr_num))
+          if downstream_pr.is_merged():
+            download_patch(r, downstream_pr)
+
+def download_patch(repo, pr):
+  """Download merged downstream PR patch.
+
+  Args:
+    pr: Github Pull request to download patch for
+  """
+  download_location = os.path.join('./patches', repo_name, '%d.patch' % pr.id)
+  print download_location
+  # Skip already downloaded patches
+  if os.path.exists(download_location):
+    return
+
+  if not os.path.exists(os.path.dirname(download_location)):
+      os.makedirs(os.path.dirname(download_location))
+  urllib.urlretrieve(pr.patch_url, download_location)
 
 if __name__ == '__main__':
-  g = Github(os.environ.get('GH_TOKEN'))
-  open_pulls = g.get_repo('GoogleCloudPlatform/magic-modules').get_pulls(state='open')
-  depends = [item for sublist in [get_downstream_prs.get_github_dependencies(g, open_pull.number) for open_pull in open_pulls] for item in sublist]
-  parsed_dependencies = [re.match(r'https://github.com/([\w-]+/[\w-]+)/pull/(\d+)', d).groups() for d in depends]
-  for r, pulls in itertools.groupby(parsed_dependencies, key=operator.itemgetter(0)):
-    repo = g.get_repo(r)
-    for pull in pulls:
-      pr = repo.get_pull(int(pull[1]))
-      print 'Checking %s to see if it should be downloaded.' % (pr,)
-      if pr.is_merged():
-        download_location = os.path.join('./patches', pull[0], pull[1] + '.patch')
-        if not os.path.exists(os.path.dirname(download_location)):
-          os.makedirs(os.path.dirname(download_location))
-        urllib.urlretrieve(pr.patch_url, download_location)
+  gh = Github(os.environ.get('GH_TOKEN'))
+  get_merged_patches(gh)
diff --git a/.ci/magic-modules/pyutils/README.md b/.ci/magic-modules/pyutils/README.md
@@ -0,0 +1,50 @@
+# Magic Modules CI Utils
+
+This directory manages all Python utils that the Magician uses to take upstream Magic Module PRs and generate and manage PRs in various downstream repos.
+
+What this shouldn't contain:
+
+- Python scripts called directly by Concourse jobs.
+- Non-Python code
+
+## Tests
+
+Currently we use the standard [unittest](https://docs.python.org/3/library/unittest.html) library. Because CI development is mostly done locally on your developer machine before being directly deployed, these tests are run manually.
+
+This section reviews running/writing tests for someone fairly new to Python/unittest, so some of this information is just from unittest docs.
+
+### Running tests
+
+Set a test environment variable to make calls to Github:
+```
+export TEST_GITHUB_TOKEN=...
+```
+
+Otherwise, tests calling Github will be ignored (or likely be rate-limited).
+```
+cd pyutils
+
+python -m unittest discover -p "*_test.py"
+python ./changelog_utils_test.py
+```
+
+Read [unittest](https://docs.python.org/3/library/unittest.html#command-line-interface) docs to see how to run tests at finer granularity.
+
+*NOTE*: Don't forget to delete .pyc files if you feel like tests aren't reflecting your changes!
+
+### Writing Tests:
+
+This is mostly a very shallow review of unittest, but your test should inherit from the `unittest.TestCase` class in some way (i.e. we haven't had the need to write our own TestCase-inheriting Test class but feel free to in the future if needed).
+
+```
+class MyModuleTest(unittest.TestCase):
+```
+
+Make sure to include the following at the bottom of your test file, so it defaults to running the tests in this file if run as a normal Python script.
+```
+if __name__ == '__main__':
+  unittest.main()
+```
+
+
+
diff --git a/.ci/magic-modules/pyutils/__init__.py b/.ci/magic-modules/pyutils/__init__.py
diff --git a/.ci/magic-modules/pyutils/downstreams.py b/.ci/magic-modules/pyutils/downstreams.py
@@ -0,0 +1,89 @@
+"""Helper class for obtaining information about upstream PR and its downstreams.
+
+  Typical usage example:
+
+  import upstream_pull_request
+
+  client = github.Github(github_token)
+  downstreams = upstream_pull_request.downstream_urls(client, 100)
+
+"""
+
+import os
+import re
+import sys
+import itertools
+import operator
+from strutils import *
+
+UPSTREAM_REPO = 'GoogleCloudPlatform/magic-modules'
+
+def find_unmerged_downstreams(client, pr_num):
+  """Returns list of urls for unmerged, open downstreams.
+
+  For each downstream PR URL found from get_parsed_downstream_urls(),
+  fetches the status of each downstream PR to determine which PRs are still
+  unmerged (i.e. not closed and not merged).
+
+  Args:
+    client: github.Github client
+    pr_num: PR Number for upstream PR
+  Returns:
+    All unmerged downstreams found for a PR.
+  """
+  unmerged_dependencies = []
+  for r, pulls in get_parsed_downstream_urls(client, pr_num):
+    repo = client.get_repo(r)
+    for _repo, pr_num in pulls:
+      pr = repo.get_pull(int(pr_num))
+      # Disregard merged or closed PRs.
+      if not pr.is_merged() and not pr.state == "closed":
+        unmerged_dependencies.append(pr.html_url)
+
+  return unmerged_dependencies
+
+def get_parsed_downstream_urls(client, pr_num):
+  """Get parsed URLs for downstream PRs grouped by repo.
+
+  For each downstream PR URL referenced by the upstream PR, this method
+  parses the downstream repo name
+  (i.e. "terraform-providers/terraform-providers-google") and PR number
+  (e.g. 100) and groups them by repo name so calling code only needs to fetch
+  each repo once.
+
+  Example:
+    parsed = UpstreamPullRequest(pr_num).parsed_downstream_urls
+    for repo, repo_pulls in parsed:
+      for _repo, pr in repo_pulls:
+        print "Downstream is https://github.com/%s/pull/%d" % (repo, pr)
+
+  Args:
+    client: github.Github client
+    pr_num: PR Number for upstream PR
+
+  Returns:
+    Iterator over $repo and sub-iterators of ($repo, $pr_num) parsed tuples
+  """
+  parsed = [parse_github_url(u) for u in get_downstream_urls(client, pr_num)]
+  return itertools.groupby(parsed, key=operator.itemgetter(0))
+
+def get_downstream_urls(client, pr_num):
+  """Get list of URLs for downstream PRs.
+
+  This fetches the upstream PR and finds its downstream PR URLs by
+  searching for references in its comments.
+
+  Args:
+    client: github.Github client
+    pr_num: PR Number for upstream PR
+
+  Returns:
+    List of downstream PR URLs.
+  """
+  urls = []
+  pr = client.get_repo(UPSTREAM_REPO).get_pull(pr_num)
+  for comment in pr.get_issue_comments():
+    urls = urls + find_dependency_urls_in_comment(comment.body)
+  return urls
+
+
diff --git a/.ci/magic-modules/pyutils/downstreams_test.py b/.ci/magic-modules/pyutils/downstreams_test.py
@@ -0,0 +1,75 @@
+from downstreams import *
+import unittest
+import os
+from github import Github
+
+TOKEN_ENV_VAR = "TEST_GITHUB_TOKEN"
+
+class TestUpstreamPullRequests(unittest.TestCase):
+  """
+    Terrible test data from scraping
+    https://github.com/GoogleCloudPlatform/magic-modules/pull/1000
+    TODO: If this test becomes load-bearing, mock out the Github client instead
+    of using this.
+  """
+  TEST_PR_NUM = 1000
+  EXPECTED_DOWNSTREAM_URLS = [
+    "https://github.com/terraform-providers/terraform-provider-google-beta/pull/186",
+    "https://github.com/terraform-providers/terraform-provider-google/pull/2591",
+    "https://github.com/modular-magician/ansible/pull/142",
+  ]
+  EXPECTED_PARSED_DOWNSTREAMS = {
+    "terraform-providers/terraform-provider-google-beta": [186],
+    "terraform-providers/terraform-provider-google": [2591],
+    "modular-magician/ansible": [142],
+  }
+
+  def setUp(self):
+    gh_token = os.environ.get(TOKEN_ENV_VAR)
+    if not gh_token:
+      self.skipTest(
+        "test env var %s not set, skip tests calling Github" % TOKEN_ENV_VAR)
+    self.test_client = Github(gh_token)
+
+  def test_find_unmerged_downstreams(self):
+    self.assertFalse(find_unmerged_downstreams(self.test_client, self.TEST_PR_NUM))
+
+  def test_parsed_downstream_urls(self):
+    result = get_parsed_downstream_urls(self.test_client, self.TEST_PR_NUM)
+    repo_cnt = 0
+    for repo, pulls in result:
+      # Verify each repo in result.
+      self.assertIn(repo, self.EXPECTED_PARSED_DOWNSTREAMS,
+        "unexpected repo %s in result" % repo)
+      repo_cnt += 1
+
+      # Verify each pull request in result.
+      expected_pulls = self.EXPECTED_PARSED_DOWNSTREAMS[repo]
+      pull_cnt = 0
+      for repo, prid in pulls:
+        self.assertIn(int(prid), expected_pulls)
+        pull_cnt += 1
+      # Verify exact count of pulls (here because iterator).
+      self.assertEquals(pull_cnt, len(expected_pulls),
+        "expected %d pull requests in result[%s]" % (len(expected_pulls), repo))
+
+    # Verify exact count of repos (here because iterator).
+    self.assertEquals(repo_cnt, len(self.EXPECTED_PARSED_DOWNSTREAMS),
+        "expected %d pull requests in result[%s]" % (
+          len(self.EXPECTED_PARSED_DOWNSTREAMS), repo))
+
+  def test_downstream_urls(self):
+    test_client = Github(os.environ.get(TOKEN_ENV_VAR))
+    result = get_downstream_urls(self.test_client,self.TEST_PR_NUM)
+
+    expected_cnt = len(self.EXPECTED_DOWNSTREAM_URLS)
+    self.assertEquals(len(result), expected_cnt,
+      "expected %d downstream urls, got %d" % (expected_cnt, len(result)))
+    for url in result:
+      self.assertIn(str(url), self.EXPECTED_DOWNSTREAM_URLS)
+
+
+if __name__ == '__main__':
+    unittest.main()
+
+