-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathgithub_graders.py
239 lines (205 loc) · 8.94 KB
/
github_graders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# github_graders.py
# Dan Wallach <[email protected]>
# Available subject to the Apache 2.0 License
# https://www.apache.org/licenses/LICENSE-2.0
import argparse
import random
import pandas as pd
from github_config import *
from github_scanner import *
from typing import List, TypeVar
# your graders, preferably their GitHub IDs (we'll ignore them if they've also checked out a copy of the assignment)
grader_list = default_grader_list
# your own GitHub ID and/or anybody else who you wish to exclude from being graded
ignore_list = default_grader_ignore_list
# command-line argument processing
parser = argparse.ArgumentParser(description='Random assignment of graders to students')
parser.add_argument('--token',
nargs=1,
default=[default_github_token],
help='GitHub API token')
parser.add_argument('--org',
nargs=1,
default=[default_github_organization],
help='GitHub organization to scan, default: ' + default_github_organization)
parser.add_argument('--prefix',
nargs=1,
default=[default_prefix],
help='Prefix on projects to match (default: match all projects)')
parser.add_argument('--teams',
action="store_true",
default=False,
help="reads GitHub team information, use on group assignments")
parser.add_argument('--students',
nargs=1,
default=[default_student_csv_name],
help="CSV file name with student information (default: student-data.csv)")
parser.add_argument('--ignore',
nargs=1,
default=[""],
help="string pattern in group names to ignore, e.g., STAFF (no default)")
args = parser.parse_args()
github_prefix = args.prefix[0]
github_organization = args.org[0]
github_token = args.token[0]
student_file_name = args.students[0]
use_teams = args.teams
ignore_str = args.ignore[0]
# Python3's parametric type hints are ... a thing.
T = TypeVar('T')
def group_list_by_n(l: List[T], n: int) -> List[List[T]]:
"""
Given a list of whatever type, divides it into a list of lists, each of which is n elements long,
until the last one, having whatever is left.
"""
if len(l) == 0:
return []
elif len(l) <= n:
return [l]
else:
return [l[0:n]] + group_list_by_n(l[n:], n)
df_students = {} # will replace below
df_students_success = False
try:
df_students = pd.read_csv(student_file_name)
# force lower-case of GitHub IDs
df_students.GitHubID = df_students.GitHubID.astype(str).str.lower() # force lower-case of GitHub IDs
df_students_success = True
sys.stdout.write("Found %d students in file %s\n" % (len(df_students), student_file_name))
except FileNotFoundError:
sys.stdout.write("Cannot file student info file: %s\n" % student_file_name)
sys.stdout.flush()
pass
def student_info(github_ids: List[str]) -> str:
"""
Given a list of GitHub IDs, returns a suitably human-readable string based on
the student-data CSV file with the students' name, email, etc.
"""
results = []
for github_id in github_ids:
if df_students_success:
matches = df_students[df_students['GitHubID'] == github_id.lower()]
if len(matches) == 1:
student = matches.iloc[0].to_dict()
elif len(matches) == 0:
sys.stdout.write("Warning: github-id (%s) not found in student info!\n" % github_id)
sys.stdout.flush()
student = {'NetID': '', 'Name': '', 'Email': '', 'GitHubID': github_id}
else:
sys.stdout.write("Warning: two or more rows found for github-id (%s) in student info!\n" % github_id)
sys.stdout.flush()
student = matches.iloc[0].to_dict()
if 'NetID' in student and student['Email'].startswith(student['NetID']):
results.append("%s <%s>" % (student['Name'], student['Email']))
else:
results.append("%s <%s> (%s)" % (student['Name'], student['Email'], student['NetID']))
else:
results.append(github_id)
return ", ".join(results)
# First things first, if we have no graders, we can't divide up the work.
if not grader_list:
print("Error: grader_list is empty, cannot assign grades")
exit(1)
ids_seen = {}
submissions = {}
all_ignore_list = ignore_list + grader_list
filtered_repo_list = [x for x in query_matching_repos(github_organization, github_prefix, github_token)
if desired_user(github_prefix, all_ignore_list, x['name'], ignore_str)]
if use_teams:
team_info = fetch_team_infos(filtered_repo_list, github_token, True)
else:
team_info = {}
url_to_gids = {}
url_to_short = {}
# Let's do a duplicate check, and also sort out the URL we want to use
# print("%d repos in the initial search\n" % len(filtered_repo_list))
for repo in filtered_repo_list:
if 'html_url' in repo:
repo['final_url'] = repo['html_url']
else:
repo['final_url'] = repo['url']
if use_teams:
gids = team_info[repo['final_url']]['team_members']
else:
gids = [student_name_from(github_prefix, repo['name'])]
url_to_gids[repo['final_url']] = gids
url_to_short[repo['final_url']] = repo['name']
for gid in gids:
if gid in ids_seen:
# check if we have an exact duplicate or not ... this shouldn't happen, but ... does.
submission_urls = [x['final_url'] for x in submissions[gid]]
if repo['final_url'] in submission_urls:
sys.stdout.write('Warning: exact url for GitHub ID <%s> seen more than once!\n' % gid)
sys.stdout.flush()
else:
sys.stdout.write('Warning: GitHub ID <%s> with different URLs seen!\n' % gid)
sys.stdout.flush()
ids_seen[gid] = ids_seen[gid] + 1
submissions[gid].append(repo)
else:
ids_seen[gid] = 1
submissions[gid] = [repo]
# now, detect the unique vs. duplicated repos
unique = {}
duplicates = {}
exemplar = {}
for gid in submissions.keys():
if len(submissions[gid]) == 1:
url = submissions[gid][0]['final_url']
unique[url] = True
else:
all_urls = [x['final_url'] for x in submissions[gid]]
for url in all_urls:
unique[url] = False
duplicates[url] = all_urls
exemplar[url] = all_urls[0]
# one more round of filtering when dealing with teams
if use_teams:
old_filtered_repo_list = filtered_repo_list
filtered_repo_list = []
for repo in old_filtered_repo_list:
gids = [x.lower() for x in team_info[repo['final_url']]['team_members']]
desired_gids = functools.reduce(lambda a, b: a and b, [x not in all_ignore_list for x in gids], True)
if desired_gids:
filtered_repo_list.append(repo)
# sanity check
for repo in filtered_repo_list:
url = repo['final_url']
if url not in url_to_gids:
print("WARNING: %s missing from url_to_gids db" % url)
if url not in url_to_short:
print("WARNING: %s missing from url_to_short db" % url)
if url not in unique:
print("WARNING: %s missing from unique db (perhaps a student repo without a team?)" % url)
elif not unique[url]:
if not url in duplicates:
print("WARNING: %s missing from duplicates db" % url)
if not url in exemplar:
print("WARNING: %s missing from exemplar db" % url)
# note: we're shuffling the graders, so different graders get lucky each week when the load isn't evenly divisible
# and, of course, we're shuffling the repos.
all_urls = [repo['final_url']
for repo in filtered_repo_list
if (repo['final_url'] in unique and unique[repo['final_url']]) or \
(repo['final_url'] in exemplar and exemplar[repo['final_url']] == repo['final_url'])]
repo_db = {repo['final_url']: repo for repo in filtered_repo_list}
random.shuffle(all_urls)
random.shuffle(grader_list)
# inefficient, but correct
grading_groups = [[entry[i] for entry
in group_list_by_n(all_urls, len(grader_list))
if i < len(entry)]
for i in range(len(grader_list))]
grader_map = dict(zip(grader_list, grading_groups))
print("# Grade assignments for %s" % github_prefix)
print("%d repos are ready to grade\n" % len(all_urls))
for grader in sorted(grader_map.keys(), key=str.lower):
print("## %s (%d total)" % (grader, len(grader_map[grader])))
for url in sorted(grader_map[grader]):
gids = url_to_gids[url]
if url in duplicates:
print("- **Possible repo duplicates**")
for u in duplicates:
print(" - [%s](%s) - %s" % (url_to_short[u], u, student_info(gids)))
else:
print("- [%s](%s) - %s" % (url_to_short[url], url, student_info(gids)))