Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(toDash): Add option to include WebVTT or TTML captions #673

Merged
merged 1 commit into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/core/mixins/MediaInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,16 @@ export default class MediaInfo {
}

let storyboards;
let captions;

if (options.include_thumbnails && player_response.storyboards) {
storyboards = player_response.storyboards;
}

if (typeof options.captions_format === 'string' && player_response.captions?.caption_tracks) {
captions = player_response.captions.caption_tracks;
}

return FormatUtils.toDash(
this.streaming_data,
this.page[0].video_details?.is_post_live_dvr,
Expand All @@ -68,6 +73,7 @@ export default class MediaInfo {
this.#actions.session.player,
this.#actions,
storyboards,
captions,
options
);
}
Expand Down
18 changes: 10 additions & 8 deletions src/parser/classes/PlayerCaptionsTracklist.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@ import Text from './misc/Text.js';
import { YTNode } from '../helpers.js';
import type { RawNode } from '../index.js';

export interface CaptionTrackData {
base_url: string;
name: Text;
vss_id: string;
language_code: string;
kind?: 'asr' | 'frc';
is_translatable: boolean;
}

export default class PlayerCaptionsTracklist extends YTNode {
static type = 'PlayerCaptionsTracklist';

caption_tracks?: {
base_url: string;
name: Text;
vss_id: string;
language_code: string;
kind?: 'asr' | 'frc';
is_translatable: boolean;
}[];
caption_tracks?: CaptionTrackData[];

audio_tracks?: {
audio_track_id: string;
Expand Down
9 changes: 9 additions & 0 deletions src/types/StreamingInfoOptions.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
export interface StreamingInfoOptions {
/**
* The format to use for the captions, when the video has captions.
* If this option is not set, the DASH manifest will not include the captions.
*
* Possible values:
* * `vtt`: Tells YouTube to return the captions in the WebVTT format
* * `ttml`: Tells YouTube to return the captions in the TTML format
*/
captions_format?: 'vtt' | 'ttml';
/**
* The label to use for the non-DRC streams when a video has DRC and streams.
*
Expand Down
36 changes: 34 additions & 2 deletions src/utils/DashManifest.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type { SegmentInfo as FSegmentInfo } from './StreamingInfo.js';
import type { FormatFilter, URLTransformer } from '../types/FormatUtils.js';
import type PlayerLiveStoryboardSpec from '../parser/classes/PlayerLiveStoryboardSpec.js';
import type { StreamingInfoOptions } from '../types/StreamingInfoOptions.js';
import type { CaptionTrackData } from '../parser/classes/PlayerCaptionsTracklist.js';

interface DashManifestProps {
streamingData: IStreamingData;
Expand All @@ -24,6 +25,7 @@ interface DashManifestProps {
player?: Player;
actions?: Actions;
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec;
captionTracks?: CaptionTrackData[];
}

async function OTFPostLiveDvrSegmentInfo({ info }: { info: FSegmentInfo }) {
Expand Down Expand Up @@ -73,14 +75,16 @@ async function DashManifest({
player,
actions,
storyboards,
captionTracks,
options
}: DashManifestProps) {
const {
getDuration,
audio_sets,
video_sets,
image_sets
} = getStreamingInfo(streamingData, isPostLiveDvr, transformURL, rejectFormat, cpn, player, actions, storyboards, options);
image_sets,
text_sets
} = getStreamingInfo(streamingData, isPostLiveDvr, transformURL, rejectFormat, cpn, player, actions, storyboards, captionTracks, options);

// XXX: DASH spec: https://standards.iso.org/ittf/PubliclyAvailableStandards/c083314_ISO_IEC%2023009-1_2022(en).zip

Expand Down Expand Up @@ -229,6 +233,32 @@ async function DashManifest({
</adaptation-set>;
})
}
{
text_sets.map((set, index) => {
return <adaptation-set
id={index + audio_sets.length + video_sets.length + image_sets.length}
mimeType={set.mime_type}
lang={set.language}
contentType="text"
>
<role
schemeIdUri="urn:mpeg:dash:role:2011"
value="caption"
/>
<label id={index + audio_sets.length}>
{set.track_name}
</label>
<representation
id={set.representation.uid}
bandwidth="0"
>
<base-url>
{set.representation.base_url}
</base-url>
</representation>
</adaptation-set>;
})
}
</period>
</mpd>;
}
Expand All @@ -242,6 +272,7 @@ export function toDash(
player?: Player,
actions?: Actions,
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec,
caption_tracks?: CaptionTrackData[],
options?: StreamingInfoOptions
) {
if (!streaming_data)
Expand All @@ -258,6 +289,7 @@ export function toDash(
player={player}
actions={actions}
storyboards={storyboards}
captionTracks={caption_tracks}
/>
);
}
50 changes: 49 additions & 1 deletion src/utils/StreamingInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type { Format } from '../parser/misc.js';
import type { PlayerLiveStoryboardSpec } from '../parser/nodes.js';
import type { FormatFilter, URLTransformer } from '../types/FormatUtils.js';
import type { StreamingInfoOptions } from '../types/StreamingInfoOptions.js';
import type { CaptionTrackData } from '../parser/classes/PlayerCaptionsTracklist.js';

const TAG_ = 'StreamingInfo';

Expand All @@ -20,6 +21,7 @@ export interface StreamingInfo {
audio_sets: AudioSet[];
video_sets: VideoSet[];
image_sets: ImageSet[];
text_sets: TextSet[];
}

export interface AudioSet {
Expand Down Expand Up @@ -122,6 +124,18 @@ export interface ImageRepresentation {
getURL(n: number): string;
}

export interface TextSet {
mime_type: string;
language: string;
track_name: string;
representation: TextRepresentation;
}

export interface TextRepresentation {
uid: string;
base_url: string;
}

interface PostLiveDvrInfo {
duration: number,
segment_count: number
Expand Down Expand Up @@ -735,6 +749,29 @@ function getImageSets(
}));
}

function getTextSets(
caption_tracks: CaptionTrackData[],
format: 'vtt' | 'ttml',
transform_url: URLTransformer
): TextSet[] {
const mime_type = format === 'vtt' ? 'text/vtt' : 'application/ttml+xml';

return caption_tracks.map((caption_track) => {
const url = new URL(caption_track.base_url);
url.searchParams.set('fmt', format);

return {
mime_type,
language: caption_track.language_code,
track_name: caption_track.name.toString(),
representation: {
uid: `text-${caption_track.vss_id}`,
base_url: transform_url(url).toString()
}
};
});
}

export function getStreamingInfo(
streaming_data?: IStreamingData,
is_post_live_dvr = false,
Expand All @@ -744,6 +781,7 @@ export function getStreamingInfo(
player?: Player,
actions?: Actions,
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec,
caption_tracks?: CaptionTrackData[],
options?: StreamingInfoOptions
) {
if (!streaming_data)
Expand Down Expand Up @@ -839,11 +877,21 @@ export function getStreamingInfo(
image_sets = getImageSets(duration, actions, storyboards, url_transformer);
}

let text_sets: TextSet[] = [];

if (caption_tracks && options?.captions_format) {
if ((options.captions_format as string) !== 'vtt' && (options.captions_format as string) !== 'ttml') {
throw new InnertubeError('Invalid captions format', options.captions_format);
}
text_sets = getTextSets(caption_tracks, options.captions_format, url_transformer);
}

const info : StreamingInfo = {
getDuration,
audio_sets,
video_sets,
image_sets
image_sets,
text_sets
};

return info;
Expand Down
Loading