Skip to content

Commit

Permalink
10.08.2024
Browse files Browse the repository at this point in the history
* if PO token is detected fallback to the IOS client to fetch stream data
* `boa_engine` updated to 0.19.0
* MSRV now 1.79.0
* Code quality improved
  • Loading branch information
Mithronn committed Aug 10, 2024
1 parent dd8beae commit d4798aa
Show file tree
Hide file tree
Showing 15 changed files with 218 additions and 105 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/target
Cargo.lock
flamegraph.html
flamegraph.html
*.mp3
22 changes: 11 additions & 11 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_ytdl"
version = "0.7.3"
version = "0.7.4"
authors = ["Mithronn"]
edition = "2021"
description = "A Rust library for Youtube video searcher and downloader"
Expand All @@ -11,7 +11,7 @@ keywords = ["youtube", "downloader", "ytdl", "youtube-dl", "searcher"]
include = ["src/**/*", "Cargo.toml"]
exclude = ["examples/**/*", "tests/**/*", ".github/**/*"]
categories = ["multimedia::video", "multimedia::audio"]
rust-version = "1.70.0"
rust-version = "1.79.0"

[package.metadata.docs.rs]
all-features = true
Expand All @@ -26,29 +26,29 @@ reqwest = { version = "0.12.5", features = [
"cookies",
"gzip",
], default-features = false }
scraper = "0.19.1"
serde = "1.0.204"
serde_json = "1.0.120"
scraper = "0.20.0"
serde = "1.0.205"
serde_json = "1.0.122"
serde_qs = "0.13.0"
regex = "1.10.5"
regex = "1.10.6"
url = "2.5.2"
urlencoding = "2.1.3"
thiserror = "1.0.63"
derive_more = "0.99.18"
derive_more = { version = "1.0.0", features = ["display"] }
derivative = "2.2.0"
once_cell = "1.19.0"
tokio = { version = "1.39.2", default-features = false, features = ["sync"] }
rand = "0.8.5"
reqwest-middleware = { version = "0.3.2", features = ["json"] }
reqwest-retry = "0.6.0"
reqwest-middleware = { version = "0.3.3", features = ["json"] }
reqwest-retry = "0.6.1"
m3u8-rs = "6.0.0"
async-trait = "0.1.81"
aes = "0.8.4"
cbc = { version = "0.1.2", features = ["std"] }
hex = "0.4.3"
boa_engine = "0.17.3"
boa_engine = "0.19.0"
mime = "0.3.17"
bytes = "1.6.1"
bytes = "1.7.1"
flame = { version = "0.2.2", optional = true }
flamer = { version = "0.5.0", optional = true }

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,5 +158,5 @@ Or add the following to your `Cargo.toml` file:

```toml
[dependencies]
rusty_ytdl = "0.7.3"
rusty_ytdl = "0.7.4"
```
2 changes: 1 addition & 1 deletion src/blocking/info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::block_async;
#[cfg(feature = "live")]
use crate::blocking::stream::LiveStream;
use crate::blocking::stream::NonLiveStream;
use crate::info::DEFAULT_DL_CHUNK_SIZE;
use crate::constants::DEFAULT_DL_CHUNK_SIZE;
use crate::structs::{VideoError, VideoInfo, VideoOptions};
use crate::utils::choose_format;
use crate::Video as AsyncVideo;
Expand Down
2 changes: 1 addition & 1 deletion src/blocking/search/youtube.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{block_async, VideoError};
use serde::Serialize;

#[derive(Clone, derive_more::Display, derivative::Derivative)]
#[display(fmt = "YouTube()")]
#[display("YouTube()")]
#[derivative(Debug, PartialEq, Eq)]
pub struct YouTube(AsyncYouTube);

Expand Down
71 changes: 68 additions & 3 deletions src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,74 @@ pub(crate) static IPV6_REGEX: Lazy<Regex> = Lazy::new(|| {
pub(crate) static PARSE_INT_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"(?m)^\s*((\-|\+)?[0-9]+)\s*").unwrap());

// 10485760 -> Default is 10MB to avoid Youtube throttle (Bigger than this value can be throttle by Youtube)
pub(crate) const DEFAULT_DL_CHUNK_SIZE: u64 = 10485760;

/// Default max number of retries for a web reqwest.
pub(crate) const DEFAULT_MAX_RETRIES: u32 = 3;

pub(crate) const POTOKEN_EXPERIMENTS: &[&str] = &["51217476", "51217102"];

pub static INNERTUBE_CLIENT: Lazy<HashMap<&str, (&str, &str, &str)>> =
// (clientVersion, clientName, json value)
Lazy::new(|| {
HashMap::from([
(
"web",
(
"2.20240726.00.00",
"1",
r#""context": {
"client": {
"clientName": "WEB",
"clientVersion": "2.20240726.00.00",
"hl": "en"
}
},"#,
),
),
(
"ios",
(
"19.29.1",
"5",
r#""context": {
"client": {
"clientName": "IOS",
"clientVersion": "19.29.1",
"deviceMake": "Apple",
"deviceModel": "iPhone16,2",
"userAgent": "com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)",
"osName": "iPhone",
"osVersion": "17.5.1.21F90",
"hl": "en"
}
},"#,
),
),
(
// This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
// See: https://github.com/yt-dlp/yt-dlp/blob/28d485714fef88937c82635438afba5db81f9089/yt_dlp/extractor/youtube.py#L231
"tv_embedded",
(
"2.0",
"85",
r#""context": {
"client": {
"clientName": "TVHTML5_SIMPLY_EMBEDDED_PLAYER",
"clientVersion": "2.0",
"hl": "en",
"clientScreen": "EMBED"
},
"thirdParty": {
"embedUrl": "https://google.com"
}
},"#,
),
),
])
});

pub static FORMATS: Lazy<HashMap<&str, StaticFormat>> = Lazy::new(|| {
HashMap::from([
(
Expand Down Expand Up @@ -1234,6 +1302,3 @@ pub static FORMATS: Lazy<HashMap<&str, StaticFormat>> = Lazy::new(|| {
),
])
});

/// Default max number of retries for a web reqwest.
pub const DEFAULT_MAX_RETRIES: u32 = 3;
134 changes: 77 additions & 57 deletions src/info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,22 @@ use crate::stream::{LiveStream, LiveStreamOptions};
use crate::structs::FFmpegArgs;

use crate::{
constants::{BASE_URL, DEFAULT_MAX_RETRIES},
constants::{BASE_URL, DEFAULT_DL_CHUNK_SIZE, DEFAULT_MAX_RETRIES, INNERTUBE_CLIENT},
info_extras::{get_media, get_related_videos},
stream::{NonLiveStream, NonLiveStreamOptions, Stream},
structs::{
CustomRetryableStrategy, PlayerResponse, VideoError, VideoInfo, VideoOptions, YTConfig,
},
utils::{
between, choose_format, clean_video_details, get_functions, get_html, get_html5player,
get_random_v6_ip, get_video_id, get_ytconfig, is_age_restricted_from_html,
between, check_experiments, choose_format, clean_video_details, get_functions, get_html,
get_html5player, get_random_v6_ip, get_video_id, get_ytconfig, is_age_restricted_from_html,
is_not_yet_broadcasted, is_play_error, is_private_video, is_rental,
parse_live_video_formats, parse_video_formats, sort_formats,
},
};

// 10485760 -> Default is 10MB to avoid Youtube throttle (Bigger than this value can be throttle by Youtube)
pub(crate) const DEFAULT_DL_CHUNK_SIZE: u64 = 10485760;

#[derive(Clone, derive_more::Display, derivative::Derivative)]
#[display(fmt = "Video({video_id})")]
#[display("Video({video_id})")]
#[derivative(Debug, PartialEq, Eq)]
pub struct Video {
video_id: String,
Expand Down Expand Up @@ -102,7 +99,10 @@ impl Video {
}
};

let max_retries = options.request_options.max_retries.unwrap_or(DEFAULT_MAX_RETRIES);
let max_retries = options
.request_options
.max_retries
.unwrap_or(DEFAULT_MAX_RETRIES);

let retry_policy = ExponentialBackoff::builder()
.retry_bounds(Duration::from_millis(1000), Duration::from_millis(30000))
Expand Down Expand Up @@ -177,8 +177,31 @@ impl Video {
return Err(VideoError::VideoIsPrivate);
}

// POToken experiment detected fallback to ios client (Webpage contains broken formats)
if check_experiments(&response) {
let ios_ytconfig = self
.get_player_ytconfig(
&response,
INNERTUBE_CLIENT.get("ios").cloned().unwrap_or_default(),
)
.await?;

let player_response_new =
serde_json::from_str::<PlayerResponse>(&ios_ytconfig).unwrap_or_default();

player_response.streaming_data = player_response_new.streaming_data;
}

if is_age_restricted {
let embed_ytconfig = self.get_embeded_ytconfig(&response).await?;
let embed_ytconfig = self
.get_player_ytconfig(
&response,
INNERTUBE_CLIENT
.get("tv_embedded")
.cloned()
.unwrap_or_default(),
)
.await?;

let player_response_new =
serde_json::from_str::<PlayerResponse>(&embed_ytconfig).unwrap_or_default();
Expand Down Expand Up @@ -484,39 +507,36 @@ impl Video {
}

#[cfg_attr(feature = "performance_analysis", flamer::flame)]
async fn get_embeded_ytconfig(&self, html: &str) -> Result<String, VideoError> {
async fn get_player_ytconfig(
&self,
html: &str,
configs: (&str, &str, &str),
) -> Result<String, VideoError> {
use std::str::FromStr;

let ytcfg = get_ytconfig(html)?;

// This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
// See: https://github.com/yt-dlp/yt-dlp/blob/28d485714fef88937c82635438afba5db81f9089/yt_dlp/extractor/youtube.py#L231
let query = serde_json::json!({
"context": {
"client": {
"clientName": "TVHTML5_SIMPLY_EMBEDDED_PLAYER",
"clientVersion": "2.0",
"hl": "en",
"clientScreen": "EMBED",
},
"thirdParty": {
"embedUrl": "https://google.com",
},
},
"playbackContext": {
"contentPlaybackContext": {
"signatureTimestamp": ytcfg.sts.unwrap_or(0),
"html5Preference": "HTML5_PREF_WANTS",
},
},
"videoId": self.get_video_id(),
});
let client = configs.2;
let sts = ytcfg.sts.unwrap_or(0);
let video_id = self.get_video_id();

let query = serde_json::from_str::<serde_json::Value>(&format!(
r#"{{
{client}
"playbackContext": {{
"contentPlaybackContext": {{
"signatureTimestamp": {sts},
"html5Preference": "HTML5_PREF_WANTS"
}}
}},
"videoId": "{video_id}"
}}"#
))
.unwrap_or_default();

static CONFIGS: Lazy<(HeaderMap, &str)> = Lazy::new(|| {
use std::str::FromStr;

(HeaderMap::from_iter([
(HeaderName::from_str("content-type").unwrap(), HeaderValue::from_str("application/json").unwrap()),
(HeaderName::from_str("X-Youtube-Client-Name").unwrap(), HeaderValue::from_str("85").unwrap()),
(HeaderName::from_str("X-Youtube-Client-Version").unwrap(), HeaderValue::from_str("2.0").unwrap()),
(HeaderName::from_str("Origin").unwrap(), HeaderValue::from_str("https://www.youtube.com").unwrap()),
(HeaderName::from_str("User-Agent").unwrap(), HeaderValue::from_str("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3513.0 Safari/537.36").unwrap()),
(HeaderName::from_str("Referer").unwrap(), HeaderValue::from_str("https://www.youtube.com/").unwrap()),
Expand All @@ -526,10 +546,20 @@ impl Video {
]),"AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8")
});

let mut headers = CONFIGS.0.clone();
headers.insert(
HeaderName::from_str("X-Youtube-Client-Version").unwrap(),
HeaderValue::from_str(configs.0).unwrap(),
);
headers.insert(
HeaderName::from_str("X-Youtube-Client-Name").unwrap(),
HeaderValue::from_str(configs.1).unwrap(),
);

let response = self
.client
.post("https://www.youtube.com/youtubei/v1/player")
.headers(CONFIGS.0.clone())
.headers(headers)
.query(&[("key", CONFIGS.1)])
.json(&query)
.send()
Expand All @@ -550,15 +580,12 @@ async fn get_m3u8(
url: &str,
client: &reqwest_middleware::ClientWithMiddleware,
) -> Result<Vec<(String, String)>, VideoError> {
let base_url = Url::parse(BASE_URL).expect("BASE_URL corrapt");
let base_url_host = base_url.host_str().expect("BASE_URL host corrapt");
let base_url = Url::parse(BASE_URL)?;
let base_url_host = base_url.host_str();

let url = Url::parse(url)
.and_then(|mut x| {
let set_host_result = x.set_host(Some(base_url_host));
if set_host_result.is_err() {
return Err(set_host_result.expect_err("How can be posible"));
}
x.set_host(base_url_host)?;
Ok(x)
})
.map(|x| x.as_str().to_string())
Expand All @@ -574,19 +601,12 @@ async fn get_m3u8(
.split('\n')
.filter(|x| HTTP_REGEX.is_match(x) && ITAG_REGEX.is_match(x));

let itag_and_url: Vec<(String, String)> = itag_and_url
.map(|line| {
let itag = ITAG_REGEX
.captures(line)
.expect("IMPOSSIBLE")
.get(1)
.map(|x| x.as_str())
.unwrap_or("");

// println!("itag: {}, url: {}", itag, line);
(itag.to_string(), line.to_string())
Ok(itag_and_url
.filter_map(|line| {
ITAG_REGEX.captures(line).and_then(|caps| {
caps.get(1)
.map(|itag| (itag.as_str().to_string(), line.to_string()))
})
})
.collect();

Ok(itag_and_url)
.collect::<Vec<(String, String)>>())
}
Loading

0 comments on commit d4798aa

Please sign in to comment.