-
Notifications
You must be signed in to change notification settings - Fork 55
/
GithubAccess.scala
313 lines (280 loc) · 13.7 KB
/
GithubAccess.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
/**
* Copyright 2016 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.oss.tools.osstrackerscraper
import java.io.IOException
import java.util.{Date, Properties}
import com.netflix.oss.tools.osstrackerscraper.OssLifecycle.OssLifecycle
import org.kohsuke.github._
import org.slf4j.{Logger, LoggerFactory}
import play.api.libs.json.{JsObject, Json}
import scala.collection.JavaConversions._
case class CommitInfo(numCommits: Int, daysSinceLastCommit: Int, contributorLogins: List[String]) {}
case class IssuesInfo(
val closedCount: Int,
val openCount: Int,
val avgDayToClose: Int,
val openCountWithNoLabels: Int,
val openCountWithLabelBug: Int,
val openCountWithLabelDuplicate: Int,
val openCountWithLabelEnhancement: Int,
val openCountWithLabelHelpWanted: Int,
val openCountWithLabelInvalid: Int,
val openCountWithLabelQuestion: Int,
val openCountWithLabelWontfix: Int,
val openCountTrulyOpen: Int
) {}
case class PRsInfo(val closedPRsSize: Int, val avgPRs: Int) {}
class GithubAccess(val asOfYYYYMMDD: String, val asOfISO: String, val connectToGithub: Boolean) {
val logger = LoggerFactory.getLogger(getClass)
val github: Option[GitHub] = if (connectToGithub) Some(GitHub.connect()) else None
def getOSSMetaDataOSSLifecycle(repo: GHRepository): OssLifecycle = {
try {
val content: GHContent = repo.getFileContent("OSSMETADATA", "master")
val contentIs = content.read()
val props = new Properties()
props.load(contentIs)
val osslc = props.getProperty("osslifecycle", "UNKNOWN")
OssLifecycleParser.getOssLifecycle(osslc)
}
catch {
case ioe: IOException => {
ioe.printStackTrace()
OssLifecycle.Unknown
}
}
}
def getRepoStats(repo: GHRepository, public: Boolean, ossLifecycle: OssLifecycle) : JsObject = {
logger.info(s"repo = ${repo.getName()}, forks = ${repo.getForks}, stars = ${repo.getWatchers}")
val openPullRequests = repo.getPullRequests(GHIssueState.OPEN)
logger.debug(s" openIssues = ${repo.getOpenIssueCount()}, openPullRequests = ${openPullRequests.size()}")
// Note that in this case, the github-api will crash on calls to listIssues with java.lang.Error
// https://github.com/kohsuke/github-api/issues/65
var neverPushed = getCloseEnoughForSameDates(repo.getCreatedAt, repo.getPushedAt)
val (commitInfo: CommitInfo, issuesInfo: IssuesInfo, prsInfo: PRsInfo) = if (neverPushed) {
logger.warn("repo has never been pushed, so providing fake zero counts for issues and pull requests")
(CommitInfo(0, 0, List[String]()), IssuesInfo(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), PRsInfo(0, 0))
} else {
val commitInfo = getCommitInfo(repo)
val issuesInfo = getIssuesStats(repo)
val prsInfo = getClosedPullRequestsStats(repo)
(commitInfo, issuesInfo, prsInfo)
}
val repoJson: JsObject = Json.obj(
"asOfISO" -> asOfISO,
"asOfYYYYMMDD" -> asOfYYYYMMDD,
"repo_name" -> repo.getName(),
"public" -> public,
"osslifecycle" -> ossLifecycle,
"forks" -> repo.getForks(),
"stars" -> repo.getWatchers(),
"numContributors" -> commitInfo.contributorLogins.size,
"issues" -> Json.obj(
"openCount" -> issuesInfo.openCount,
"openCountOnlyIssues" -> issuesInfo.openCountTrulyOpen,
"closedCount" -> issuesInfo.closedCount,
"avgTimeToCloseInDays" -> issuesInfo.avgDayToClose,
"openCountByStandardTags" -> Json.obj(
"bug" -> issuesInfo.openCountWithLabelBug,
"helpWanted" -> issuesInfo.openCountWithLabelHelpWanted,
"question" -> issuesInfo.openCountWithLabelQuestion,
"duplicate" -> issuesInfo.openCountWithLabelDuplicate,
"enhancement" -> issuesInfo.openCountWithLabelEnhancement,
"invalid" -> issuesInfo.openCountWithLabelInvalid,
"wontfix" -> issuesInfo.openCountWithLabelWontfix
),
),
"pullRequests" -> Json.obj(
"openCount" -> openPullRequests.size(),
"closedCount" -> prsInfo.closedPRsSize,
"avgTimeToCloseInDays" -> prsInfo.avgPRs
),
"commits" -> Json.obj(
"daysSinceLastCommit" -> commitInfo.daysSinceLastCommit
),
"contributors" -> commitInfo.contributorLogins
)
logger.debug("repo json = " + repoJson)
repoJson
}
// TODO: Is there a faster way to only pull the last commit?
def getCommitInfo(repo: GHRepository) : CommitInfo = {
val commits = repo.listCommits().asList()
val orderedCommits = commits.sortBy(_.getCommitShortInfo.getCommitDate())
val lastCommitDate = orderedCommits(orderedCommits.length - 1).getCommitShortInfo().getCommitDate()
logger.debug(s"commits, first = ${orderedCommits(0).getSHA1}, last = ${orderedCommits(orderedCommits.length - 1).getSHA1()}")
val daysSinceLastCommit = daysBetween(lastCommitDate, new Date())
logger.debug(s"daysSinceLastCommit = ${daysSinceLastCommit}")
val contributors = commits.filter { commit => Option(commit.getAuthor()).isDefined }
val contributorLogins = contributors.map(contributor => contributor.getAuthor().getLogin()).distinct
logger.debug(s"numContribitors = ${contributorLogins.length}, contributorEmails = ${contributorLogins}")
CommitInfo(commits.length, daysSinceLastCommit, contributorLogins.toList)
}
def getClosedPullRequestsStats(repo: GHRepository) : PRsInfo = {
val closedPRs = repo.getPullRequests(GHIssueState.CLOSED)
val timeToClosePR = closedPRs.map(pr => {
val opened = pr.getCreatedAt()
val closed = pr.getClosedAt()
val difference = daysBetween(opened, closed)
difference
})
val sumPRs = timeToClosePR.sum
val avgPRs = timeToClosePR.size match {
case 0 => 0
case _ => sumPRs / timeToClosePR.size
}
logger.debug(s"avg days to close ${closedPRs.size()} pull requests = ${avgPRs} days")
PRsInfo(closedPRs.size, avgPRs)
}
def getIssuesStats(repo: GHRepository): IssuesInfo = {
val closedIssues = repo.getIssues(GHIssueState.CLOSED).filter(_.getPullRequest == null).toArray
val openIssues = repo.getIssues(GHIssueState.OPEN).filter(_.getPullRequest == null).toArray
getIssuesStats(closedIssues, openIssues)
}
def getIssuesStats(closedIssues: Array[GHIssue], openIssues: Array[GHIssue]): IssuesInfo = {
val (openCountNoLabels, openCountWithLabelBug, openCountWithLabelDuplicate,
openCountWithLabelEnhancement, openCountWithLabelHelpWanted,
openCountWithLabelInvalid, openCountWithLabelQuestion, openCountWithLabelWontfix,
openCountTrulyOpen) = getIssuesLabelStats(openIssues)
val timeToCloseIssue = closedIssues.map(issue => {
val opened = issue.getCreatedAt()
val closed = issue.getClosedAt()
val difference = daysBetween(opened, closed)
difference
})
val sumIssues = timeToCloseIssue.sum
val avgDaysToCloseIssues = timeToCloseIssue.size match {
case 0 => 0
case _ => sumIssues / timeToCloseIssue.size
}
logger.debug(s"avg days to close ${closedIssues.length} issues = ${avgDaysToCloseIssues} days")
IssuesInfo(closedIssues.size, openIssues.size, avgDaysToCloseIssues, openCountNoLabels, openCountWithLabelBug,
openCountWithLabelDuplicate, openCountWithLabelEnhancement,
openCountWithLabelHelpWanted, openCountWithLabelInvalid, openCountWithLabelQuestion, openCountWithLabelWontfix,
openCountTrulyOpen)
}
def getIssuesLabelStats(openIssues: Array[GHIssue]): (Int, Int, Int, Int, Int, Int, Int, Int, Int) = {
val openCountNoLabels = openIssues.count(issue => issue.getLabels.size() == 0)
// standard labels that count
val openCountWithLabelBug = countLabelForIssues(openIssues, "bug")
val openCountWithLabelHelpWanted = countLabelForIssues(openIssues, "help wanted")
val openCountWithLabelQuestion = countLabelForIssues(openIssues, "question")
// standard labels that dont' count
val openCountWithLabelDuplicate = countLabelForIssues(openIssues, "duplicate")
val openCountWithLabelEnhancement = countLabelForIssues(openIssues, "enhancement")
val openCountWithLabelInvalid = countLabelForIssues(openIssues, "invalid")
val openCountWithLabelWontfix = countLabelForIssues(openIssues, "wontfix")
val openCountTrulyOpen = countLabelsForTrueIssues(openIssues)
(
openCountNoLabels, openCountWithLabelBug, openCountWithLabelDuplicate,
openCountWithLabelEnhancement, openCountWithLabelHelpWanted,
openCountWithLabelInvalid, openCountWithLabelQuestion, openCountWithLabelWontfix,
openCountTrulyOpen)
}
def countLabelsForTrueIssues(issues: Array[GHIssue]): Int = {
// note that some issues will have bug and enhancement, we need to honor the worst case label (bug)
// note that some issues will have bug and invalid, we don't want to double count
// so, if no label, count it
// for single labels
// if (bug || help wanted || question) count it
// if (duplicate || enhancement || invalid || wont fix) don't count it
// for multiple labels
// if (bug || help wanted || question) count it
// if no standard github labels count it
val count: Int = issues.count(issue => {
val labels = issue.getLabels.toList
val shouldCount = if (labels.size == 0) {
true // no labels so counts
} else {
if (hasBugOrQuestionLabel(labels)) {
true // has bug or question, so counts
}
else if (hasInvalidOrWontFix(labels)) {
false // has invalid or wontfix, so doesn't count
}
else {
val duplicate = hasLabelOfName(labels, "duplicate")
val enhancement = hasLabelOfName(labels, "enhancement")
val helpwanted = hasLabelOfName(labels, "helpwanted")
// by this point bug and question and invalid and wontfix = false
val computed = (duplicate, enhancement, helpwanted) match {
case (false, false, false) => true // no labels except custom labels
case (false, false, true) => true // help wanted and [custom labels]
case (false, true, false) => false // enhancement and [custom labels]
case (false, true, true) => false // enhancement and helpwanted and [custom labels]
case (true, false, false) => true // duplicate and [custom labels]
case (true, false, true) => true // duplicate and helpwanted and [custom labels]
case (true, true, false) => false // duplicate and enhancement and [custom labels]
case (true, true, true) => false // duplicate, enhancement, help wanted and [custom labels]
}
computed
}
}
// val shouldCount = if (labels.size == 0) true else {
// // TODO: this doesn't work for enhancement&&help wanted (counts it, but shouldn't)
// val standardCounts = hasLabelOfName(labels, "bug") || hasLabelOfName(labels, "help wanted") || hasLabelOfName(labels, "question")
// val helpWantedAndEnhancement = hasLabelOfName(labels, "help wanted") && hasLabelOfName(labels, "enhancement")
// val doesNotHaveSomeStandardLabels = !hasSomeStandardGithubLabels(labels)
// standardCounts || doesNotHaveSomeStandardLabels
// }
logger.debug(s"issue ${issue.getNumber} counts = ${shouldCount}, labels = ${labels.map{_.getName}}")
shouldCount
})
count
}
// Issues with these labels ALWAYS count
def hasBugOrQuestionLabel(labels: List[GHLabel]): Boolean = {
// Future: Eventually we can let custom labels be configured per scraper or per project (OSSMETADATA)
hasLabelOfName(labels, "bug") || hasLabelOfName(labels, "question")
}
// Issues with these labels will never count as long as not Bug or Question
def hasInvalidOrWontFix(labels: List[GHLabel]): Boolean = {
// Future: Eventually we can let custom labels be configured per scraper or per project (OSSMETADATA)
hasLabelOfName(labels, "invalid") || hasLabelOfName(labels, "wontfix")
}
// def hasSomeStandardGithubLabels(labels: List[GHLabel]): Boolean = {
// hasLabelOfName(labels, "bug") || hasLabelOfName(labels, "help wanted") || hasLabelOfName(labels, "question") ||
// hasLabelOfName(labels, "duplicate") || hasLabelOfName(labels, "enhancement") || hasLabelOfName(labels, "invalid") || hasLabelOfName(labels, "wontfix")
// }
def hasLabelOfName(labels: List[GHLabel], name: String): Boolean = {
!labels.find(_.getName == name).isEmpty
}
def countLabelForIssues(issues: Array[GHIssue], label: String): Int = {
val openCountWithLabelBug: Int = issues.count(issue =>
issue.getLabels.size() != 0 &&
!issue.getLabels.find(_.getName == label).isEmpty
)
openCountWithLabelBug
}
def daysBetween(smaller: Date, bigger: Date): Int = {
val diff = (bigger.getTime() - smaller.getTime()) / (1000 * 60 * 60 * 24)
diff.toInt
}
def getRemainingHourlyRate(): Int = {
github.get.getRateLimit.remaining
}
def getAllRepositoriesForOrg(githubOrg: String): List[GHRepository] = {
val org = github.get.getOrganization(githubOrg)
val githubRepos = org.listRepositories(100).asList().toList
logger.info(s"Found ${githubRepos.size} total repos for ${githubOrg}")
githubRepos
}
def getCloseEnoughForSameDates(d1: Date, d2: Date): Boolean = {
val d1T = d1.getTime
val d2T = d2.getTime
val diff = Math.abs(d1T - d2T)
return diff < 1000*60; // 60 seconds
}
}