Auto-detect components based on changed files.

Display issues under multiple components and add extra components. Closes #10.
databricks · Sep 2, 2014 · 8c79463 · 8c79463
1 parent b5b44b8
commit 8c79463
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 24 deletions.
diff --git a/main.py b/main.py
@@ -1,4 +1,5 @@
 import json
+from collections import defaultdict
 from dateutil.parser import parse as parse_datetime
 from dateutil import tz
 from datetime import datetime
@@ -59,6 +60,13 @@ def main():
     homepage = memcache.get("homepage", namespace=VERSION)
     if IS_DEV_APPSERVER or homepage is None:
         issues = Issue.query(Issue.state == "open").order(-Issue.updated_at).fetch()
-        homepage = render_template('index.html', session=session, issues=issues)
+        issues_by_component = defaultdict(list)
+        for issue in issues:
+            for component in issue.components:
+                issues_by_component[component].append(issue)
+        # Display the groups in the order listed in Issues._components
+        grouped_issues = [(c[0], issues_by_component[c[0]]) for c in Issue._components]
+        homepage = render_template('index.html', session=session,
+                                   grouped_issues=grouped_issues)
         memcache.set("homepage", value=homepage, time=60, namespace=VERSION)
     return homepage
diff --git a/sparkprs/models.py b/sparkprs/models.py
@@ -4,6 +4,7 @@
 from github_api import raw_request, PULLS_BASE, ISSUES_BASE
 import json
 import logging
+import re
 from sparkprs.utils import parse_pr_title, is_jenkins_command, contains_jenkins_command
 
 
@@ -43,25 +44,38 @@ class Issue(ndb.Model):
 
     TAG_REGEX = r"\[[^\]]*\]"
 
+    _components = [
+        # (name, pr_title_regex, filename_regex)
+        ("Core", "core", "^core/"),
+        ("Python", "python|pyspark", "python"),
+        ("YARN", "yarn", "yarn"),
+        ("Mesos", "mesos", "mesos"),
+        ("Web UI", "webui|(web ui)", "spark/ui/"),
+        ("Build", "build", "(pom\.xml)|project"),
+        ("Docs", "docs", "docs|README"),
+        ("EC2", "ec2", "ec2"),
+        ("SQL", "sql", "sql"),
+        ("MLlib", "mllib", "mllib"),
+        ("GraphX", "graphx|pregel", "graphx"),
+        ("Streaming", "stream|flume|kafka|twitter|zeromq", "streaming"),
+    ]
+
     @property
-    def component(self):
-        # TODO: support multiple components
-        title = ((self.pr_json and self.pr_json["title"]) or self.title).lower()
-        if "sql" in title:
-            return "SQL"
-        elif "mllib" in title:
-            return "MLlib"
-        elif "graphx" in title or "pregel" in title:
-            return "GraphX"
-        elif "yarn" in title:
-            return "YARN"
-        elif ("stream" in title or "flume" in title or "kafka" in title
-              or "twitter" in title or "zeromq" in title):
-            return "Streaming"
-        elif "python" in title or "pyspark" in title:
-            return "Python"
-        else:
-            return "Core"
+    def components(self):
+        """
+        Returns the list of components used to classify this pull request.
+
+        Components are identified automatically based on the files that the pull request
+        modified and any tags added to the pull request's title (such as [GraphX]).
+        """
+        components = []
+        title = ((self.pr_json and self.pr_json["title"]) or self.title)
+        modified_files = [f["filename"] for f in (self.files_json or [])]
+        for (component_name, pr_title_regex, filename_regex) in Issue._components:
+            if re.search(pr_title_regex, title, re.IGNORECASE) or \
+                    any(re.search(filename_regex, f, re.I) for f in modified_files):
+                components.append(component_name)
+        return components or ["Core"]
 
     @property
     def parsed_title(self):

diff --git a/templates/index.html b/templates/index.html
@@ -35,18 +35,18 @@
         <h2>Spark Pull Requests</h2>
         <!-- Nav tabs -->
         <ul class="nav nav-tabs" role="tablist">
-        {% for group in issues | groupby('component') %}
+        {% for (component, issues) in grouped_issues %}
             <li class="{% if loop.first %}active{% endif %}">
-                <a href="#{{group.grouper}}" role="tag" data-toggle="tab">{{group.grouper}} <span class="badge">{{group.list | count}}</span></a>
+                <a href="#{{component | replace(' ', '')}}" role="tag" data-toggle="tab">{{component}} <span class="badge">{{issues | count}}</span></a>
             </li>
         {% endfor %}
         </ul>
 
         <!-- Tab panes -->
         <div class="tab-content">
-        {% for group in issues | groupby('component') %}
+        {% for (component, issues) in grouped_issues %}
             <div class="tab-pane {% if loop.first %}active{% endif %}"
-                 id="{{group.grouper}}">
+                 id="{{component | replace(' ', '')}}">
             <table class="table table-striped table-condensed sortable">
             <tr>
                 <th>Number</th>
@@ -59,7 +59,7 @@ <h2>Spark Pull Requests</h2>
                 <th>Jenkins</th>
                 <th>Updated</th>
             </tr>
-            {% for issue in group.list %}
+            {% for issue in issues %}
                 <tr>
                     <td class="sorttable_numeric">
                         <a href="https://www.github.com/apache/spark/pull/{{issue.number}}" target="_blank">