Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for the new channel layout (part 1) #3374

Merged
merged 3 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions spec/invidious/helpers_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ CONFIG = Config.from_yaml(File.open("config/config.example.yml"))
Spectator.describe "Helper" do
describe "#produce_channel_videos_url" do
it "correctly produces url for requesting page `x` of a channel's videos" do
expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")).to eq("/browse_ajax?continuation=4qmFsgI8EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4&gl=US&hl=en")
# expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")).to eq("/browse_ajax?continuation=4qmFsgI8EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4&gl=US&hl=en")
#
# expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4R0FFPQ%3D%3D&gl=US&hl=en")

expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0V4R0FFPQ%3D%3D&gl=US&hl=en")
# expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", page: 20)).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUE9PQ%3D%3D&gl=US&hl=en")

expect(produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw", page: 20)).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUE9PQ%3D%3D&gl=US&hl=en")

expect(produce_channel_videos_url(ucid: "UC-9-kyTW8ZkZNDHQJ6FgpwQ", page: 20, sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQy05LWt5VFc4WmtaTkRIUUo2Rmdwd1EaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUJnQg%3D%3D&gl=US&hl=en")
# expect(produce_channel_videos_url(ucid: "UC-9-kyTW8ZkZNDHQJ6FgpwQ", page: 20, sort_by: "popular")).to eq("/browse_ajax?continuation=4qmFsgJAEhhVQy05LWt5VFc4WmtaTkRIUUo2Rmdwd1EaJEVnWjJhV1JsYjNNd0FqZ0JZQUZxQUxnQkFDQUFlZ0l5TUJnQg%3D%3D&gl=US&hl=en")
end
end

Expand Down
90 changes: 43 additions & 47 deletions src/invidious/channels/videos.cr
Original file line number Diff line number Diff line change
@@ -1,53 +1,48 @@
def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:base64" => {
"2:string" => "videos",
"6:varint" => 2_i64,
"7:varint" => 1_i64,
"12:varint" => 1_i64,
"13:string" => "",
"23:varint" => 0_i64,
},
object_inner_2 = {
"2:0:embedded" => {
"1:0:varint" => 0_i64,
},
"5:varint" => 50_i64,
"6:varint" => 1_i64,
"7:varint" => (page * 30).to_i64,
"9:varint" => 1_i64,
"10:varint" => 0_i64,
}

if !v2
if auto_generated
seed = Time.unix(1525757349)
until seed >= Time.utc
seed += 1.month
end
timestamp = seed - (page - 1).months

object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
end
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object_inner_2_encoded = object_inner_2
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }

object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:varint" => 30_i64 * (page - 1),
}))),
})))
end
object_inner_1 = {
"110:embedded" => {
"3:embedded" => {
"15:embedded" => {
"1:embedded" => {
"1:string" => object_inner_2_encoded,
"2:string" => "00000000-0000-0000-0000-000000000000",
},
"3:varint" => 1_i64,
},
},
},
}

case sort_by
when "newest"
when "popular"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x01_i64
when "oldest"
object["80226972:embedded"]["3:base64"].as(Hash)["3:varint"] = 0x02_i64
else nil # Ignore
end
object_inner_1_encoded = object_inner_1
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }

object["80226972:embedded"]["3:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json(object["80226972:embedded"]["3:base64"])))
object["80226972:embedded"].delete("3:base64")
object = {
"80226972:embedded" => {
"2:string" => ucid,
"3:string" => object_inner_1_encoded,
"35:string" => "browse-feed#{ucid}videos102",
},
}

continuation = object.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
Expand All @@ -67,10 +62,11 @@ end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo

2.times do |i|
initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
videos.concat extract_videos(initial_data, author, ucid)
end
# 2.times do |i|
# initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
initial_data = get_channel_videos_response(ucid, 1, auto_generated: auto_generated, sort_by: sort_by)
videos = extract_videos(initial_data, author, ucid)
# end

return videos.size, videos
end
Expand Down
136 changes: 114 additions & 22 deletions src/invidious/yt_backend/extractors.cr
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ private ITEM_PARSERS = {
Parsers::PlaylistRendererParser,
Parsers::CategoryRendererParser,
Parsers::RichItemRendererParser,
Parsers::ReelItemRendererParser,
}

record AuthorFallback, name : String, id : String
Expand Down Expand Up @@ -369,7 +370,7 @@ private module Parsers
end

# Parses an InnerTube richItemRenderer into a SearchVideo.
# Returns nil when the given object isn't a shelfRenderer
# Returns nil when the given object isn't a RichItemRenderer
#
# A richItemRenderer seems to be a simple wrapper for a videoRenderer, used
# by the result page for hashtags. It is located inside a continuationItems
Expand All @@ -390,6 +391,90 @@ private module Parsers
return {{@type.name}}
end
end

# Parses an InnerTube reelItemRenderer into a SearchVideo.
# Returns nil when the given object isn't a reelItemRenderer
#
# reelItemRenderer items are used in the new (2022) channel layout,
# in the "shorts" tab.
#
module ReelItemRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item["reelItemRenderer"]?
return self.parse(item_contents, author_fallback)
end
end

private def self.parse(item_contents, author_fallback)
video_id = item_contents["videoId"].as_s

video_details_container = item_contents.dig(
"navigationEndpoint", "reelWatchEndpoint",
"overlay", "reelPlayerOverlayRenderer",
"reelPlayerHeaderSupportedRenderers",
"reelPlayerHeaderRenderer"
)

# Author infos

author = video_details_container
.dig?("channelTitleText", "runs", 0, "text")
.try &.as_s || author_fallback.name

ucid = video_details_container
.dig?("channelNavigationEndpoint", "browseEndpoint", "browseId")
.try &.as_s || author_fallback.id

# Title & publication date

title = video_details_container.dig?("reelTitleText")
.try { |t| extract_text(t) } || ""

published = video_details_container
.dig?("timestampText", "simpleText")
.try { |t| decode_date(t.as_s) } || Time.utc

# View count

view_count_text = video_details_container.dig?("viewCountText", "simpleText")
view_count_text ||= video_details_container
.dig?("viewCountText", "accessibility", "accessibilityData", "label")

view_count = view_count_text.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64

# Duration

a11y_data = item_contents
.dig?("accessibility", "accessibilityData", "label")
.try &.as_s || ""

regex_match = /- (?<min>\d+ minutes? )?(?<sec>\d+ seconds?)+ -/.match(a11y_data)

minutes = regex_match.try &.["min"].to_i(strict: false) || 0
seconds = regex_match.try &.["sec"].to_i(strict: false) || 0

duration = (minutes*60 + seconds)

SearchVideo.new({
title: title,
id: video_id,
author: author,
ucid: ucid,
published: published,
views: view_count,
description_html: "",
length_seconds: duration,
live_now: false,
premium: false,
premiere_timestamp: Time.unix(0),
author_verified: false,
})
end

def self.parser_name
return {{@type.name}}
end
end
end

# The following are the extractors for extracting an array of items from
Expand Down Expand Up @@ -436,21 +521,31 @@ private module Extractors
content = extract_selected_tab(target["tabs"])["content"]

if section_list_contents = content.dig?("sectionListRenderer", "contents")
section_list_contents.as_a.each do |renderer_container|
renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0]

# Category extraction
if items_container = renderer_container_contents["shelfRenderer"]?
raw_items << renderer_container_contents
next
elsif items_container = renderer_container_contents["gridRenderer"]?
else
items_container = renderer_container_contents
end
raw_items = unpack_section_list(section_list_contents)
elsif rich_grid_contents = content.dig?("richGridRenderer", "contents")
raw_items = rich_grid_contents.as_a
end

items_container["items"]?.try &.as_a.each do |item|
raw_items << item
end
return raw_items
end

private def self.unpack_section_list(contents)
raw_items = [] of JSON::Any

contents.as_a.each do |renderer_container|
renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0]

# Category extraction
if items_container = renderer_container_contents["shelfRenderer"]?
raw_items << renderer_container_contents
next
elsif items_container = renderer_container_contents["gridRenderer"]?
else
items_container = renderer_container_contents
end

items_container["items"]?.try &.as_a.each do |item|
raw_items << item
end
end

Expand Down Expand Up @@ -525,14 +620,11 @@ private module Extractors
end

private def self.extract(target)
raw_items = [] of JSON::Any
if content = target["gridContinuation"]?
raw_items = content["items"].as_a
elsif content = target["continuationItems"]?
raw_items = content.as_a
end
content = target["continuationItems"]?
content ||= target.dig?("gridContinuation", "items")
content ||= target.dig?("richGridContinuation", "contents")

return raw_items
return content.nil? ? [] of JSON::Any : content.as_a
end

def self.extractor_name
Expand Down