forked from learningequality/sushi-chef-ubongokids
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyoutube.py
124 lines (101 loc) · 3.64 KB
/
youtube.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import logging
import os
import time
logger = logging.getLogger(__name__)
from pressurecooker.youtube import YouTubeResource
class CachingClient:
def __init__(self, client, cache):
self.client = client
self.cache = cache
def get_video_data(self, id):
return self._get(
self._gen_video_cache_key, id, self.client.get_video_data
)
def get_playlist_data(self, id):
return self._get(
self._gen_playlist_cache_key,
id,
self.client.get_playlist_data,
)
def get_channel_data(self, id):
return self._get(
self._gen_channel_cache_key,
id,
self.client.get_channel_data,
)
def _get(self, cache_key_gen_func, id, get_func):
key = cache_key_gen_func(id)
found, data = self.cache.get(key)
if not found:
data = get_func(id)
self.cache.add(key, data)
return data
def _gen_playlist_cache_key(self, x):
return "playlist:{}".format(x)
def _gen_channel_cache_key(self, x):
return "channel:{}".format(x)
def _gen_video_cache_key(self, x):
return "video:{}".format(x)
def stats(self):
return self.cache.stats()
class Client:
def __init__(self, client):
self.client = client
def _get(self, url):
try:
return self.client.extract_info(url, download=False)
except Exception:
# Exception info lacks URL
logger.error("Error fetching url: {}".format(url))
raise
def get_video_data(self, id):
video_url = "https://www.youtube.com/watch?v={}".format(id)
# Undocumented hack to skip proxies in development
useproxy = os.environ.get("PROXY_LIST").strip() != "skip"
ytres = YouTubeResource(
video_url,
useproxy=useproxy,
)
video_info = ytres.get_resource_info()
if not video_info:
return None
result = dict(url=video_url)
result.update(video_info)
# Avoid hitting the rate limit when not proxying
# 4 seconds should work:
# https://github.com/ytdl-org/youtube-dl/issues/22382#issuecomment-546145713
if not useproxy:
time.sleep(4)
return result
def get_playlist_data(self, id):
playlist = self._get("https://www.youtube.com/playlist?list={}".format(id))
if playlist.get("_type", None) != "playlist":
logger.error("Got this data: {}".format(playlist))
raise AssertionError("Not a playlist")
return dict(
id=playlist["id"],
url=playlist["webpage_url"],
name=playlist["title"],
videos=[entry["id"] for entry in playlist.get("entries")],
)
def get_channel_data(self, id):
# Firstly, get all the playlists
channel = self._get("https://www.youtube.com/channel/{}/playlists".format(id))
if channel.get("_type", None) != "playlist":
logger.error("Got this data: {}".format(channel))
raise AssertionError("Not a url reference")
def get_playlist_id(url):
"""
Example:
https://www.youtube.com/playlist?list=PLjSFjqcCS3M-OdPo7sZZSwpK5d1KL4kAR
"""
__, playlist_id = url.split("=")
return playlist_id
entries = channel.get("entries")
name = channel["title"]
return dict(
id=channel["id"],
url=channel["webpage_url"],
name=name,
playlists=[get_playlist_id(entry['url']) for entry in entries],
)