From afad094a9d467385dc85ae9db85052e8df075b82 Mon Sep 17 00:00:00 2001 From: absidue <48293849+absidue@users.noreply.github.com> Date: Mon, 24 Jun 2024 23:02:54 +0200 Subject: [PATCH] feat(toDash): Add option to include WebVTT or TTML captions --- src/core/mixins/MediaInfo.ts | 6 +++ src/parser/classes/PlayerCaptionsTracklist.ts | 18 +++---- src/types/StreamingInfoOptions.ts | 9 ++++ src/utils/DashManifest.tsx | 36 +++++++++++++- src/utils/StreamingInfo.ts | 48 ++++++++++++++++++- 5 files changed, 106 insertions(+), 11 deletions(-) diff --git a/src/core/mixins/MediaInfo.ts b/src/core/mixins/MediaInfo.ts index e685dd5f22..55abb0a7c4 100644 --- a/src/core/mixins/MediaInfo.ts +++ b/src/core/mixins/MediaInfo.ts @@ -54,11 +54,16 @@ export default class MediaInfo { } let storyboards; + let captions; if (options.include_thumbnails && player_response.storyboards) { storyboards = player_response.storyboards; } + if (typeof options.captions_format === 'string' && player_response.captions?.caption_tracks) { + captions = player_response.captions.caption_tracks; + } + return FormatUtils.toDash( this.streaming_data, this.page[0].video_details?.is_post_live_dvr, @@ -68,6 +73,7 @@ export default class MediaInfo { this.#actions.session.player, this.#actions, storyboards, + captions, options ); } diff --git a/src/parser/classes/PlayerCaptionsTracklist.ts b/src/parser/classes/PlayerCaptionsTracklist.ts index 926a2d50eb..1579883d48 100644 --- a/src/parser/classes/PlayerCaptionsTracklist.ts +++ b/src/parser/classes/PlayerCaptionsTracklist.ts @@ -2,17 +2,19 @@ import Text from './misc/Text.js'; import { YTNode } from '../helpers.js'; import type { RawNode } from '../index.js'; +export interface CaptionTrackData { + base_url: string; + name: Text; + vss_id: string; + language_code: string; + kind?: 'asr' | 'frc'; + is_translatable: boolean; +} + export default class PlayerCaptionsTracklist extends YTNode { static type = 'PlayerCaptionsTracklist'; - caption_tracks?: { - base_url: string; - name: Text; - vss_id: string; - language_code: string; - kind?: 'asr' | 'frc'; - is_translatable: boolean; - }[]; + caption_tracks?: CaptionTrackData[]; audio_tracks?: { audio_track_id: string; diff --git a/src/types/StreamingInfoOptions.ts b/src/types/StreamingInfoOptions.ts index 5d0982a613..2074e8a652 100644 --- a/src/types/StreamingInfoOptions.ts +++ b/src/types/StreamingInfoOptions.ts @@ -1,4 +1,13 @@ export interface StreamingInfoOptions { + /** + * The format to use for the captions, when the video has captions. + * If this option is not set, the DASH manifest will not include the captions. + * + * Possible values: + * * `vtt`: Tells YouTube to return the captions in the WebVTT format + * * `ttml`: Tells YouTube to return the captions in the TTML format + */ + captions_format?: 'vtt' | 'ttml'; /** * The label to use for the non-DRC streams when a video has DRC and streams. * diff --git a/src/utils/DashManifest.tsx b/src/utils/DashManifest.tsx index d8997b57c2..5c0620450a 100644 --- a/src/utils/DashManifest.tsx +++ b/src/utils/DashManifest.tsx @@ -13,6 +13,7 @@ import type { SegmentInfo as FSegmentInfo } from './StreamingInfo.js'; import type { FormatFilter, URLTransformer } from '../types/FormatUtils.js'; import type PlayerLiveStoryboardSpec from '../parser/classes/PlayerLiveStoryboardSpec.js'; import type { StreamingInfoOptions } from '../types/StreamingInfoOptions.js'; +import type { CaptionTrackData } from '../parser/classes/PlayerCaptionsTracklist.js'; interface DashManifestProps { streamingData: IStreamingData; @@ -24,6 +25,7 @@ interface DashManifestProps { player?: Player; actions?: Actions; storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec; + captionTracks?: CaptionTrackData[]; } async function OTFPostLiveDvrSegmentInfo({ info }: { info: FSegmentInfo }) { @@ -73,14 +75,16 @@ async function DashManifest({ player, actions, storyboards, + captionTracks, options }: DashManifestProps) { const { getDuration, audio_sets, video_sets, - image_sets - } = getStreamingInfo(streamingData, isPostLiveDvr, transformURL, rejectFormat, cpn, player, actions, storyboards, options); + image_sets, + text_sets + } = getStreamingInfo(streamingData, isPostLiveDvr, transformURL, rejectFormat, cpn, player, actions, storyboards, captionTracks, options); // XXX: DASH spec: https://standards.iso.org/ittf/PubliclyAvailableStandards/c083314_ISO_IEC%2023009-1_2022(en).zip @@ -229,6 +233,32 @@ async function DashManifest({ ; }) } + { + text_sets.map((set, index) => { + return + + + + + {set.representation.base_url} + + + ; + }) + } ; } @@ -242,6 +272,7 @@ export function toDash( player?: Player, actions?: Actions, storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec, + caption_tracks?: CaptionTrackData[], options?: StreamingInfoOptions ) { if (!streaming_data) @@ -258,6 +289,7 @@ export function toDash( player={player} actions={actions} storyboards={storyboards} + captionTracks={caption_tracks} /> ); } diff --git a/src/utils/StreamingInfo.ts b/src/utils/StreamingInfo.ts index 9f1838a0e1..3e19115d25 100644 --- a/src/utils/StreamingInfo.ts +++ b/src/utils/StreamingInfo.ts @@ -12,6 +12,7 @@ import type { Format } from '../parser/misc.js'; import type { PlayerLiveStoryboardSpec } from '../parser/nodes.js'; import type { FormatFilter, URLTransformer } from '../types/FormatUtils.js'; import type { StreamingInfoOptions } from '../types/StreamingInfoOptions.js'; +import type { CaptionTrackData } from '../parser/classes/PlayerCaptionsTracklist.js'; const TAG_ = 'StreamingInfo'; @@ -20,6 +21,7 @@ export interface StreamingInfo { audio_sets: AudioSet[]; video_sets: VideoSet[]; image_sets: ImageSet[]; + text_sets: TextSet[]; } export interface AudioSet { @@ -122,6 +124,18 @@ export interface ImageRepresentation { getURL(n: number): string; } +export interface TextSet { + mime_type: string; + language: string; + track_name: string; + representation: TextRepresentation; +} + +export interface TextRepresentation { + uid: string; + base_url: string; +} + interface PostLiveDvrInfo { duration: number, segment_count: number @@ -735,6 +749,27 @@ function getImageSets( })); } +function getTextSets( + caption_tracks: CaptionTrackData[], + format: 'vtt' | 'ttml', + transform_url: URLTransformer +): TextSet[] { + return caption_tracks.map((caption_track) => { + const url = new URL(caption_track.base_url); + url.searchParams.set('fmt', format); + + return { + mime_type: format === 'vtt' ? 'text/vtt' : 'application/ttml+xml', + language: caption_track.language_code, + track_name: caption_track.name.toString(), + representation: { + uid: `text-${caption_track.vss_id}`, + base_url: transform_url(url).toString() + } + }; + }); +} + export function getStreamingInfo( streaming_data?: IStreamingData, is_post_live_dvr = false, @@ -744,6 +779,7 @@ export function getStreamingInfo( player?: Player, actions?: Actions, storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec, + caption_tracks?: CaptionTrackData[], options?: StreamingInfoOptions ) { if (!streaming_data) @@ -839,11 +875,21 @@ export function getStreamingInfo( image_sets = getImageSets(duration, actions, storyboards, url_transformer); } + let text_sets: TextSet[] = []; + + if (caption_tracks && options?.captions_format) { + if ((options.captions_format as string) !== 'vtt' && (options.captions_format as string) !== 'ttml') { + throw new InnertubeError('Invalid captions format', options.captions_format); + } + text_sets = getTextSets(caption_tracks, options.captions_format, url_transformer); + } + const info : StreamingInfo = { getDuration, audio_sets, video_sets, - image_sets + image_sets, + text_sets }; return info;