From bada6ab7c5222efb20e1aad835600317c6aa55a2 Mon Sep 17 00:00:00 2001 From: Parth Sharma Date: Wed, 29 May 2019 15:21:34 +0530 Subject: [PATCH 1/5] implement 'code_changes_lines' metric Signed-off-by: Parth Sharma --- augur/datasources/augur_db/augur_db.py | 29 ++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/augur/datasources/augur_db/augur_db.py b/augur/datasources/augur_db/augur_db.py index e66235d297..a5c3406612 100644 --- a/augur/datasources/augur_db/augur_db.py +++ b/augur/datasources/augur_db/augur_db.py @@ -68,6 +68,35 @@ def code_changes(self, repo_url, period='day', begin_date=None, end_date=None): 'begin_date': begin_date, 'end_date': end_date}) return results + @annotate(tag='code-changes-lines') + def code_changes_lines(self, repo_url, period='day', begin_date=None, end_date=None): + """Returns a timeseries of code changes added and removed. + + :param repo_url: The repository's URL + :param period: To set the periodicity to 'day', 'week', 'month', or 'year', defaults to 'day' + :param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00' + :param end_date: Specifies the end date, defaults to datetime.now() + :return: DataFrame of code changes/period + """ + if not begin_date: + begin_date = '1970-1-1 00:00:00' + if not end_date: + end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + code_changes_lines_SQL = s.sql.text(""" + SELECT date_trunc(:period, cmt_author_date::DATE) as commit_date, SUM(cmt_added) AS added, SUM(cmt_removed) as removed + FROM commits + WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) + AND cmt_author_date BETWEEN :begin_date AND :end_date + GROUP BY commit_date + ORDER BY commit_date; + """) + + results = pd.read_sql(code_changes_lines_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) + return results + + ##################################### ### EXPERIMENTAL ### ##################################### From 0b6ab9e35d6eb6e16d22477d32a913d879f27325 Mon Sep 17 00:00:00 2001 From: Parth Sharma Date: Wed, 29 May 2019 18:27:56 +0530 Subject: [PATCH 2/5] implement 'issues_new' metric Signed-off-by: Parth Sharma --- augur/datasources/augur_db/augur_db.py | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/augur/datasources/augur_db/augur_db.py b/augur/datasources/augur_db/augur_db.py index a5c3406612..300c4e46dd 100644 --- a/augur/datasources/augur_db/augur_db.py +++ b/augur/datasources/augur_db/augur_db.py @@ -96,6 +96,34 @@ def code_changes_lines(self, repo_url, period='day', begin_date=None, end_date=N 'begin_date': begin_date, 'end_date': end_date}) return results + @annotate(tag='issues-new') + def issues_new(self, repo_url, period='day', begin_date=None, end_date=None): + """Returns a timeseries of new issues opened. + + :param repo_url: The repository's URL + :param period: To set the periodicity to 'day', 'week', 'month', or 'year', defaults to 'day' + :param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00' + :param end_date: Specifies the end date, defaults to datetime.now() + :return: DataFrame of new issues/period + """ + if not begin_date: + begin_date = '1970-1-1 00:00:00' + if not end_date: + end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + issues_new_SQL = s.sql.text(""" + SELECT date_trunc(:period, created_at::DATE) as issue_date, COUNT(issue_id) as issues + FROM issues + WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) + AND created_at BETWEEN :begin_date AND :end_date + GROUP BY issue_date + ORDER BY issue_date; + """) + + results = pd.read_sql(issues_new_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) + return results + ##################################### ### EXPERIMENTAL ### From 9b05643c88511324dddb32277bf2f910ecf1b607 Mon Sep 17 00:00:00 2001 From: Parth Sharma Date: Wed, 29 May 2019 19:12:58 +0530 Subject: [PATCH 3/5] implement 'issues_closed' metric Signed-off-by: Parth Sharma --- augur/datasources/augur_db/augur_db.py | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/augur/datasources/augur_db/augur_db.py b/augur/datasources/augur_db/augur_db.py index 300c4e46dd..4f2b7b464f 100644 --- a/augur/datasources/augur_db/augur_db.py +++ b/augur/datasources/augur_db/augur_db.py @@ -125,6 +125,34 @@ def issues_new(self, repo_url, period='day', begin_date=None, end_date=None): return results + @annotate(tag='issues-closed') + def issues_closed(self, repo_url, period='day', begin_date=None, end_date=None): + """Returns a timeseries of issues closed. + + :param repo_url: The repository's URL + :param period: To set the periodicity to 'day', 'week', 'month', or 'year', defaults to 'day' + :param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00' + :param end_date: Specifies the end date, defaults to datetime.now() + :return: DataFrame of issues closed/period + """ + if not begin_date: + begin_date = '1970-1-1 00:00:00' + if not end_date: + end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + issues_closed_SQL = s.sql.text(""" + SELECT date_trunc(:period, closed_at::DATE) as issue_close_date, COUNT(issue_id) as issues + FROM issues + WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) + AND closed_at IS NOT NULL AND closed_at BETWEEN :begin_date AND :end_date + GROUP BY issue_close_date + ORDER BY issue_close_date; + """) + + results = pd.read_sql(issues_closed_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period, + 'begin_date': begin_date, 'end_date': end_date}) + return results + ##################################### ### EXPERIMENTAL ### ##################################### From 2414c4733b76c2ad53ddcca768d4f5ca283ec637 Mon Sep 17 00:00:00 2001 From: Parth Sharma Date: Wed, 29 May 2019 21:17:43 +0530 Subject: [PATCH 4/5] implement 'issue_backlog' metric Signed-off-by: Parth Sharma --- augur/datasources/augur_db/augur_db.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/augur/datasources/augur_db/augur_db.py b/augur/datasources/augur_db/augur_db.py index 4f2b7b464f..791f3496b7 100644 --- a/augur/datasources/augur_db/augur_db.py +++ b/augur/datasources/augur_db/augur_db.py @@ -153,6 +153,23 @@ def issues_closed(self, repo_url, period='day', begin_date=None, end_date=None): 'begin_date': begin_date, 'end_date': end_date}) return results + @annotate(tag='issue-backlog') + def issues_backlog(self, repo_url): + """Returns number of issues currently open. + + :param repo_url: The repository's URL + :return: DataFrame of count of issues currently open. + """ + issues_backlog_SQL = s.sql.text(""" + SELECT COUNT(*) + FROM issues + WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) + AND issue_state='open' + """) + + result = pd.read_sql(issues_backlog_SQL, self.db, params={'repourl': f'%{repo_url}%'}) + return result + ##################################### ### EXPERIMENTAL ### ##################################### From 2f6ace727405b9c7db8b324ce9e37f8452941cd7 Mon Sep 17 00:00:00 2001 From: Parth Sharma Date: Wed, 29 May 2019 22:08:13 +0530 Subject: [PATCH 5/5] implement 'issue_duration' metric Signed-off-by: Parth Sharma --- augur/datasources/augur_db/augur_db.py | 42 ++++++++++++++++++-------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/augur/datasources/augur_db/augur_db.py b/augur/datasources/augur_db/augur_db.py index 791f3496b7..107b8d6c8c 100644 --- a/augur/datasources/augur_db/augur_db.py +++ b/augur/datasources/augur_db/augur_db.py @@ -1,6 +1,6 @@ #SPDX-License-Identifier: MIT """ -Data source that uses the Augur relational database of GitHub activity. +Data source that uses the Augur relational database of GitHub activity. """ import pandas as pd @@ -29,7 +29,7 @@ def __init__(self, user, password, host, port, dbname, schema): connect_args={'options': '-csearch_path={}'.format(schema)}) logger.debug('GHTorrent: Connecting to {} schema of {}:{}/{} as {}'.format(schema, host, port, dbname, user)) - + # try: # self.userid('howderek') # except Exception as e: @@ -56,22 +56,22 @@ def code_changes(self, repo_url, period='day', begin_date=None, end_date=None): end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') code_changes_SQL = s.sql.text(""" - SELECT date_trunc(:period, cmt_committer_date::DATE) as commit_date, COUNT(cmt_id) + SELECT date_trunc(:period, cmt_committer_date::DATE) as commit_date, COUNT(cmt_id) FROM commits - WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) + WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) AND cmt_committer_date BETWEEN :begin_date AND :end_date GROUP BY commit_date ORDER BY commit_date; """) - results = pd.read_sql(code_changes_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period, + results = pd.read_sql(code_changes_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period, 'begin_date': begin_date, 'end_date': end_date}) return results @annotate(tag='code-changes-lines') def code_changes_lines(self, repo_url, period='day', begin_date=None, end_date=None): """Returns a timeseries of code changes added and removed. - + :param repo_url: The repository's URL :param period: To set the periodicity to 'day', 'week', 'month', or 'year', defaults to 'day' :param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00' @@ -86,7 +86,7 @@ def code_changes_lines(self, repo_url, period='day', begin_date=None, end_date=N code_changes_lines_SQL = s.sql.text(""" SELECT date_trunc(:period, cmt_author_date::DATE) as commit_date, SUM(cmt_added) AS added, SUM(cmt_removed) as removed FROM commits - WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) + WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) AND cmt_author_date BETWEEN :begin_date AND :end_date GROUP BY commit_date ORDER BY commit_date; @@ -95,7 +95,7 @@ def code_changes_lines(self, repo_url, period='day', begin_date=None, end_date=N results = pd.read_sql(code_changes_lines_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period, 'begin_date': begin_date, 'end_date': end_date}) return results - + @annotate(tag='issues-new') def issues_new(self, repo_url, period='day', begin_date=None, end_date=None): """Returns a timeseries of new issues opened. @@ -114,7 +114,7 @@ def issues_new(self, repo_url, period='day', begin_date=None, end_date=None): issues_new_SQL = s.sql.text(""" SELECT date_trunc(:period, created_at::DATE) as issue_date, COUNT(issue_id) as issues FROM issues - WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) + WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) AND created_at BETWEEN :begin_date AND :end_date GROUP BY issue_date ORDER BY issue_date; @@ -123,7 +123,7 @@ def issues_new(self, repo_url, period='day', begin_date=None, end_date=None): results = pd.read_sql(issues_new_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period, 'begin_date': begin_date, 'end_date': end_date}) return results - + @annotate(tag='issues-closed') def issues_closed(self, repo_url, period='day', begin_date=None, end_date=None): @@ -153,6 +153,24 @@ def issues_closed(self, repo_url, period='day', begin_date=None, end_date=None): 'begin_date': begin_date, 'end_date': end_date}) return results + @annotate(tag='issue-duration') + def issue_duration(self, repo_url): + """Returns the duration of each issue. + + :param repo_url: The repository's URL + :return: DataFrame of issue id with the corresponding duration + """ + issue_duration_SQL = s.sql.text(""" + SELECT issue_id, (closed_at - created_at) AS duration + FROM issues + WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1) + AND closed_at IS NOT NULL + ORDER BY issue_id; + """) + + results = pd.read_sql(issue_duration_SQL, self.db, params={'repourl': f'%{repo_url}%'}) + return results + @annotate(tag='issue-backlog') def issues_backlog(self, repo_url): """Returns number of issues currently open. @@ -177,12 +195,12 @@ def issues_backlog(self, repo_url): @annotate(tag='lines-changed-by-author') def lines_changed_by_author(self, repo_url): """ - Returns number of lines changed per author per day + Returns number of lines changed per author per day :param repo_url: the repository's URL """ linesChangedByAuthorSQL = s.sql.text(""" - SELECT cmt_author_email, cmt_author_date, cmt_author_affiliation as affiliation, + SELECT cmt_author_email, cmt_author_date, cmt_author_affiliation as affiliation, SUM(cmt_added) as additions, SUM(cmt_removed) as deletions, SUM(cmt_whitespace) as whitespace FROM commits WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)