diff --git a/youtube/channel.py b/youtube/channel.py index 1ec56352..0dafde5b 100644 --- a/youtube/channel.py +++ b/youtube/channel.py @@ -47,6 +47,12 @@ ('X-YouTube-Client-Name', '1'), ('X-YouTube-Client-Version', '2.20180830'), ) +headers_pbj = ( + ('Accept', '*/*'), + ('Accept-Language', 'en-US,en;q=0.5'), + ('X-YouTube-Client-Name', '2'), + ('X-YouTube-Client-Version', '2.20180830'), +) # https://www.youtube.com/browse_ajax?action_continuation=1&direct_render=1&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D # https://www.youtube.com/browse_ajax?ctoken=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&continuation=4qmFsgJAEhhVQzdVY3M0MkZaeTN1WXpqcnF6T0lIc3caJEVnWjJhV1JsYjNNZ0FEZ0JZQUZxQUhvQk1yZ0JBQSUzRCUzRA%3D%3D&itct=CDsQybcCIhMIhZi1krTc2wIVjMicCh2HXQnhKJsc @@ -99,14 +105,15 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1): def get_number_of_videos(channel_id): # Uploads playlist playlist_id = 'UU' + channel_id[2:] - url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true' + url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' print("Getting number of videos") - response = common.fetch_url(url, common.mobile_ua + headers_1) + response = common.fetch_url(url, common.mobile_ua + headers_pbj) '''with open('debug/playlist_debug_metadata', 'wb') as f: f.write(response)''' response = response.decode('utf-8') print("Got response for number of videos") - match = re.search(r'"num_videos_text":\s*{(?:"item_type":\s*"formatted_string",)?\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response) + + match = re.search(r'"numVideosText":\s*{\s*"runs":\s*\[{"text":\s*"([\d,]*) videos"', response) if match: return int(match.group(1).replace(',','')) else: diff --git a/youtube/common.py b/youtube/common.py index 59d757cc..df8280d8 100644 --- a/youtube/common.py +++ b/youtube/common.py @@ -483,8 +483,11 @@ def get_thumbnail(node): 'thumbnail': ('thumbnail', get_thumbnail), 'thumbnails': ('thumbnail', lambda node: node[0]['thumbnails'][0]['url']), + 'viewCountText': ('views', get_text), + 'numVideosText': ('size', lambda node: get_text(node).split(' ')[0]), # the format is "324 videos" 'videoCountText': ('size', get_text), 'playlistId': ('id', lambda node: node), + 'descriptionText': ('description', get_formatted_text), 'subscriberCountText': ('subscriber_count', get_text), 'channelId': ('id', lambda node: node), @@ -510,6 +513,10 @@ def renderer_info(renderer): info['views'] = get_text(renderer['viewCountText']) elif 'shortViewCountText' in renderer: info['views'] = get_text(renderer['shortViewCountText']) + + if 'ownerText' in renderer: + info['author'] = renderer['ownerText']['runs'][0]['text'] + info['author_url'] = renderer['ownerText']['runs'][0]['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] try: overlays = renderer['thumbnailOverlays'] except KeyError: diff --git a/youtube/playlist.py b/youtube/playlist.py index ee447e71..cc0da337 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -42,35 +42,35 @@ def playlist_ctoken(playlist_id, offset): headers_1 = ( ('Accept', '*/*'), ('Accept-Language', 'en-US,en;q=0.5'), - ('X-YouTube-Client-Name', '1'), + ('X-YouTube-Client-Name', '2'), ('X-YouTube-Client-Version', '2.20180614'), ) def playlist_first_page(playlist_id, report_text = "Retrieved playlist"): - url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&ajax=1&disable_polymer=true' + url = 'https://m.youtube.com/playlist?list=' + playlist_id + '&pbj=1' content = common.fetch_url(url, common.mobile_ua + headers_1, report_text=report_text) - if content[0:4] == b")]}'": - content = content[4:] + '''with open('debug/playlist_debug', 'wb') as f: + f.write(content)''' content = json.loads(common.uppercase_escape(content.decode('utf-8'))) + return content #https://m.youtube.com/playlist?itct=CBMQybcCIhMIptj9xJaJ2wIV2JKcCh3Idwu-&ctoken=4qmFsgI2EiRWTFBMT3kwajlBdmxWWlB0bzZJa2pLZnB1MFNjeC0tN1BHVEMaDmVnWlFWRHBEUWxFJTNE&pbj=1 -def get_videos_ajax(playlist_id, page): +def get_videos(playlist_id, page): - url = "https://m.youtube.com/playlist?action_continuation=1&ajax=1&ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + url = "https://m.youtube.com/playlist?ctoken=" + playlist_ctoken(playlist_id, (int(page)-1)*20) + "&pbj=1" headers = { 'User-Agent': ' Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'X-YouTube-Client-Name': '2', - 'X-YouTube-Client-Version': '1.20180508', + 'X-YouTube-Client-Version': '2.20180508', } content = common.fetch_url(url, headers, report_text="Retrieved playlist") '''with open('debug/playlist_debug', 'wb') as f: f.write(content)''' - content = content[4:] info = json.loads(common.uppercase_escape(content.decode('utf-8'))) return info @@ -89,22 +89,22 @@ def get_playlist_page(env, start_response): else: tasks = ( gevent.spawn(playlist_first_page, playlist_id, report_text="Retrieved playlist info" ), - gevent.spawn(get_videos_ajax, playlist_id, page) + gevent.spawn(get_videos, playlist_id, page) ) gevent.joinall(tasks) first_page_json, this_page_json = tasks[0].value, tasks[1].value - try: - video_list = this_page_json['content']['section_list']['contents'][0]['contents'][0]['contents'] - except KeyError: - video_list = this_page_json['content']['continuation_contents']['contents'] + try: # first page + video_list = this_page_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] + except KeyError: # other pages + video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents'] videos_html = '' for video_json in video_list: - info = common.ajax_info(video_json) + info = common.renderer_info(video_json['playlistVideoRenderer']) videos_html += common.video_item_html(info, common.small_video_item_template) - metadata = common.ajax_info(first_page_json['content']['playlist_header']) + metadata = common.renderer_info(first_page_json['response']['header']['playlistHeaderRenderer']) video_count = int(metadata['size'].replace(',', '')) page_buttons = common.page_buttons_html(int(page), math.ceil(video_count/20), common.URL_ORIGIN + "/playlist", env['QUERY_STRING'])