forked from hellerve/programming-talks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
add_youtube_durations.py
98 lines (85 loc) · 3.36 KB
/
add_youtube_durations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""
A script for generating durations for Youtube videos.
Written by Bruno Thalmann (https://github.com/thalmann).
"""
import requests
import re
import optparse
from urllib.parse import parse_qs
def load_youtube_api_key():
try:
return open('youtube_api_key.txt', 'r').read().strip()
except:
print('Add file called youtube_ api.txt and insert a youtube api key.')
print('Youtube api key can be obtained at: https://console.developers.google.com.')
exit(0)
def get_number_of_lines(file_name):
with open(file_name, 'r') as f:
return sum(1 for l in f)
def get_duration(json_video):
hours = 0
minutes = 0
seconds = 0
s = json_video['items'][0]['contentDetails']['duration']
match = re.match(r'PT([0-5]?[\d])M([0-5]?[\d]?)S?', s)
if match:
items = match.groups()
minutes = items[0]
seconds = items[1]
else:
match_with_hours = re.match(r'PT([\d]?[\d])H([0-5]?[\d]?)M?([0-5]?[\d]?)S?', s)
if match_with_hours:
items = match_with_hours.groups()
hours = items[0]
minutes = items[1]
seconds = items[2]
return hours, minutes, seconds
def print_duration(duration):
hours = duration[0] or 0
minutes = duration[1] or 0
seconds = duration[2] or 0
return ' [%02d:%02d:%02d]' % (hours, minutes, seconds)
def handle_log(log):
print('Done!')
if not log:
print('Everything went well!')
else:
print('\n'.join(log))
def main():
youtube_api_key = load_youtube_api_key()
parser = optparse.OptionParser('usage%prog -f <target_file>')
parser.add_option('-f', dest='input_file', type='string', help='specify input file')
(options, args) = parser.parse_args()
input_file = options.input_file
log = []
number_of_lines = get_number_of_lines(input_file)
with open(input_file, 'r+') as f:
data = f.read()
new_data = []
for i, line in enumerate(data.split('\n'), 1):
print('Parsing line: ' + str(i) + ' of ' + str(number_of_lines))
# FIXME: Assumes that no two videos have the exact same length
has_been_added_earlier = re.findall('\[\d\d:\d\d:\d\d\]', line)
if has_been_added_earlier:
new_data.append(line)
else:
youtube_match = re.findall('http[s]?://www.youtube.com/watch\?v\=[a-zA-Z0-9_-]+', line)
if youtube_match:
link = youtube_match[0]
video_id = parse_qs(link.split('?')[1]).get('v')
try:
r = requests.get('https://www.googleapis.com/youtube/v3/videos?key=' + youtube_api_key + '&part=contentDetails&id=' + video_id)
except:
log.append('The request to the youtube API went wrong. Video id: ' + video_id + '. Youtube api key: ' + youtube_api_key + '.')
duration = get_duration(r.json())
new_line = re.split('(\))', line)
new_line[1] += print_duration(duration)
new_data.append(''.join(new_line))
print(''.join(new_line))
else:
new_data.append(line)
f.seek(0) # set file cursor to start of file
f.write('\n'.join(new_data))
handle_log(log)
if __name__ == '__main__':
main()