regro · CJ-Wright · Jan 29, 2019 · Jan 26, 2019 · Jan 28, 2019 · Jan 28, 2019
diff --git a/conda_forge_tick/auto_tick.xsh b/conda_forge_tick/auto_tick.xsh
@@ -26,6 +26,7 @@ $MIGRATORS = [
    # Noarch(pr_limit=10),
    # Pinning(pr_limit=1, removals={'perl'}),
    # Compiler(pr_limit=7),
+   ArchRebuild(pr_limit=7)
 ]
 
 def run(attrs, migrator, feedstock=None, protocol='ssh',
@@ -206,6 +207,39 @@ def add_rebuild(migrators, gx):
                         cycles=cycles))
 
 
+def add_arch_migrate(migrators, gx):
+    """Adds rebuild migrators.
+
+    Parameters
+    ----------
+    migrators : list of Migrator
+        The list of migrators to run.
+
+    """
+    total_graph = copy.deepcopy(gx)
+
+    for node, attrs in gx.node.items():
+        meta_yaml = attrs.get("meta_yaml", {}) or {}
+        # no need to consider noarch packages for this rebuild
+        noarch = meta_yaml.get('build', {}).get('noarch')
+        if noarch:
+            pluck(total_graph, node)
+
+    # post plucking we can have several strange cases, lets remove all selfloops
+    total_graph.remove_edges_from(total_graph.selfloop_edges())
+
+    top_level = {node for node in total_graph if not set(total_graph.predecessors(node))}
+    cycles = list(nx.simple_cycles(total_graph))
+    print('cycles are here:', cycles)
+
+    migrators.append(
+        ArchRebuild(graph=total_graph,
+                pr_limit=5,
+                name='aarch64 and ppc64le addition',
+                        top_level=top_level,
+                        cycles=cycles))
+
+
 def initialize_migrators(do_rebuild=False):
     setup_logger(logger)
     temp = g`/tmp/*`
@@ -221,6 +255,7 @@ def initialize_migrators(do_rebuild=False):
     # TODO: reenable once graph order is correct
     if do_rebuild:
         add_rebuild($MIGRATORS, gx)
+    add_arch_migrate($MIGRATORS,gx)
 
     return gx, smithy_version, pinning_version, temp, $MIGRATORS
 

diff --git a/conda_forge_tick/make_graph.py b/conda_forge_tick/make_graph.py
@@ -7,13 +7,15 @@
 import time
 import random
 import builtins
+import contextlib
 from copy import deepcopy
 
 from concurrent.futures import ProcessPoolExecutor, as_completed, ThreadPoolExecutor
 
 import github3
 import networkx as nx
 import requests
+import yaml
 
 from xonsh.lib.collections import ChainDB, _convert_to_dict
 from .all_feedstocks import get_all_feedstocks
@@ -36,20 +38,26 @@ def get_attrs(name, i):
     }
 
     logger.info((i, name))
-    r = requests.get(
-        "https://raw.githubusercontent.com/"
-        "conda-forge/{}-feedstock/master/recipe/"
-        "meta.yaml".format(name)
-    )
-    if r.status_code != 200:
-        logger.warn(
-            "Something odd happened when fetching recipe "
-            "{}: {}".format(name, r.status_code)
+    def fetch_file(filepath):
+        r = requests.get(
+            "https://raw.githubusercontent.com/"
+            "conda-forge/{}-feedstock/master/{}".format(name, filepath)
         )
-        sub_graph["bad"] = "make_graph: {}".format(r.status_code)
+        failed = False
+        if r.status_code != 200:
+            logger.warn(
+                "Something odd happened when fetching recipe "
+                "{}: {}".format(name, r.status_code)
+            )
+            sub_graph["bad"] = "make_graph: {}".format(r.status_code)
+            failed = True
+
+        text = r.content.decode("utf-8")
+        return text, failed
+
+    text, failed = fetch_file('recipe/meta.yaml')
+    if failed:
         return sub_graph
-
-    text = r.content.decode("utf-8")
     sub_graph["raw_meta_yaml"] = text
     yaml_dict = ChainDB(
         *[parse_meta_yaml(text, platform=plat) for plat in ["win", "osx", "linux"]]
@@ -60,6 +68,13 @@ def get_attrs(name, i):
         return sub_graph
     sub_graph["meta_yaml"] = _convert_to_dict(yaml_dict)
 
+    # Get the conda-forge.yml
+    text, failed = fetch_file('conda-forge.yml')
+    if failed:
+        return sub_graph
+    sub_graph["conda_forge.yml"] = {k: v for k, v in  yaml.safe_load(text).items() if
+        k in {'provider', 'max_py_ver', 'max_r_ver', 'compiler_stack'}}
+
     # TODO: Write schema for dict
     req = get_requirements(yaml_dict)
     sub_graph["req"] = req
@@ -88,7 +103,7 @@ def get_attrs(name, i):
 
 
 def _build_graph_process_pool(gx, names, new_names):
-    with ProcessPoolExecutor(max_workers=20) as pool:
+    with executor('dask', max_workers=20) as (pool, as_completed):
         futures = {
             pool.submit(get_attrs, name, i): name for i, name in enumerate(names)
         }
@@ -148,13 +163,20 @@ def make_graph(names, gx=None):
     return gx
 
 
+def github_client():
+    if os.environ.get('GITHUB_TOKEN'):
+        return github3.login(token=os.environ['GITHUB_TOKEN'])
+    else:
+        return  github3.login(os.environ["USERNAME"], os.environ["PASSWORD"])
+
+
 def update_graph_pr_status(gx: nx.DiGraph) -> nx.DiGraph:
-    gh = github3.login(os.environ["USERNAME"], os.environ["PASSWORD"])
+    gh = github_client()
     futures = {}
     node_ids = list(gx.nodes)
     # this makes sure that github rate limits are dispersed
     random.shuffle(node_ids)
-    with ThreadPoolExecutor(max_workers=NUM_GITHUB_THREADS) as pool:
+    with executor('threads', NUM_GITHUB_THREADS) as (pool, as_completed):
         for node_id in node_ids:
             node = gx.nodes[node_id]
             prs = node.get("PRed_json", {})
@@ -164,30 +186,30 @@ def update_graph_pr_status(gx: nx.DiGraph) -> nx.DiGraph:
                     future = pool.submit(refresh_pr, pr_json, gh)
                     futures[future] = (node_id, migrator)
 
-    for f in as_completed(futures):
-        name, muid = futures[f]
-        try:
-            res = f.result()
-            if res:
-                gx.nodes[name]["PRed_json"][muid].update(**res)
-                logger.info("Updated json for {}: {}".format(name, res["id"]))
-        except github3.GitHubError as e:
-            logger.critical("GITHUB ERROR ON FEEDSTOCK: {}".format(name))
-            if is_github_api_limit_reached(e, gh):
-                break
-        except Exception as e:
-            logger.critical("ERROR ON FEEDSTOCK: {}: {}".format(name, muid))
-            raise
+        for f in as_completed(futures):
+            name, muid = futures[f]
+            try:
+                res = f.result()
+                if res:
+                    gx.nodes[name]["PRed_json"][muid].update(**res)
+                    logger.info("Updated json for {}: {}".format(name, res["id"]))
+            except github3.GitHubError as e:
+                logger.critical("GITHUB ERROR ON FEEDSTOCK: {}".format(name))
+                if is_github_api_limit_reached(e, gh):
+                    break
+            except Exception as e:
+                logger.critical("ERROR ON FEEDSTOCK: {}: {}".format(name, muid))
+                raise
     return gx
 
 
 def close_labels(gx: nx.DiGraph) -> nx.DiGraph:
-    gh = github3.login(os.environ["USERNAME"], os.environ["PASSWORD"])
+    gh = github_client()
     futures = {}
     node_ids = list(gx.nodes)
     # this makes sure that github rate limits are dispersed
     random.shuffle(node_ids)
-    with ThreadPoolExecutor(max_workers=NUM_GITHUB_THREADS) as pool:
+    with executor('thread', NUM_GITHUB_THREADS) as (pool, as_completed):
         for node_id in node_ids:
             node = gx.nodes[node_id]
             prs = node.get("PRed_json", {})
@@ -197,34 +219,56 @@ def close_labels(gx: nx.DiGraph) -> nx.DiGraph:
                     future = pool.submit(close_out_labels, pr_json, gh)
                     futures[future] = (node_id, migrator)
 
-    for f in as_completed(futures):
-        name, muid = futures[f]
-        try:
-            res = f.result()
-            if res:
-                gx.node[name]["PRed"].remove(muid)
-                del gx.nodes[name]["PRed_json"][muid]
-                logger.info(
-                    "Closed and removed PR and branch for "
-                    "{}: {}".format(name, res["id"])
-                )
-        except github3.GitHubError as e:
-            logger.critical("GITHUB ERROR ON FEEDSTOCK: {}".format(name))
-            if is_github_api_limit_reached(e, gh):
-                break
-        except Exception as e:
-            logger.critical("ERROR ON FEEDSTOCK: {}: {}".format(name, muid))
-            raise
+        for f in as_completed(futures):
+            name, muid = futures[f]
+            try:
+                res = f.result()
+                if res:
+                    gx.node[name]["PRed"].remove(muid)
+                    del gx.nodes[name]["PRed_json"][muid]
+                    logger.info(
+                        "Closed and removed PR and branch for "
+                        "{}: {}".format(name, res["id"])
+                    )
+            except github3.GitHubError as e:
+                logger.critical("GITHUB ERROR ON FEEDSTOCK: {}".format(name))
+                if is_github_api_limit_reached(e, gh):
+                    break
+            except Exception as e:
+                logger.critical("ERROR ON FEEDSTOCK: {}: {}".format(name, muid))
+                raise
     return gx
 
 
+@contextlib.contextmanager
+def executor(kind, max_workers):
+    """General purpose utility to get an executor with its as_completed handler
+
+    This allows us to easily use other executors as needed.
+    """
+    if kind == 'thread':
+        with ThreadPoolExecutor(max_workers=max_workers) as pool:
+            yield pool, as_completed
+    if kind == 'process':
+        with ProcessPoolExecutor(max_workers=max_workers) as pool:
+            yield pool, as_completed
+    if kind == 'dask':
+        import distributed
+        with distributed.LocalCluster(n_workers=max_workers) as cluster:
+            with distributed.Client(cluster) as client:
+                yield client, distributed.as_completed
+
+
 def main(args=None):
     setup_logger(logger)
     names = get_all_feedstocks(cached=True)
     gx = nx.read_gpickle("graph.pkl")
     gx = make_graph(names, gx)
-    gx = update_graph_pr_status(gx)
-    gx = close_labels(gx)
+    # Utility flag for testing -- we don't need to always update GH
+    no_github_fetch = os.environ.get('CONDA_FORGE_TICK_NO_GITHUB_REQUESTS')
+    if not no_github_fetch:
+        gx = update_graph_pr_status(gx)
+        gx = close_labels(gx)
 
     logger.info("writing out file")
     nx.write_gpickle(gx, "graph.pkl")