Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Using new YouTube search API #75

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 12 additions & 56 deletions lib/main.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
const URL = require('url');
const UTIL = require('./util.js');
const UTIL_V2 = require('./util_v2.js');
const PARSE_ITEM = require('./parseItem.js');
const MINIGET = require('miniget');

// eslint-disable-next-line no-useless-escape, max-len
const nextpagRegex = /<div class="(([^"]*branded\-page\-box[^"]*search\-pager)|([^"]*search\-pager[^"]*branded\-page\-box))/;
const SEARCH_V2 = require('./search_v2.js');

const main = module.exports = async(searchString, options) => {
const resp = {};
Expand All @@ -16,58 +12,18 @@ const main = module.exports = async(searchString, options) => {
resp.currentRef = options.nextpageRef;
// Do request
const body = await MINIGET(UTIL.buildRef(resp.currentRef, searchString, options), options).text();
let parsed;
try {
parsed = JSON.parse(body);
} catch(err) {
const str = UTIL.between(body, 'var ytInitialData =', '; \n');
return UTIL_V2.mapJSON(JSON.parse(str));
/**
* TODO: Save API key and client version to avoid using v1 search completely
*/
options.key = UTIL.between(body, 'INNERTUBE_API_KEY":"', '"') || UTIL.between(body, 'innertubeApiKey":"', '"');
if (!options.key) {
// V1 result
let res = await main(searchString, options);
return res;
}
const content = parsed[parsed.length - 1].body.content;

// Get the table of items and parse it (remove null items where the parsing failed)
resp.items = UTIL
.between(content, '<ol id="item-section-', '\n</ol>')
.split('</li>\n\n<li>')
.filter(t => {
let condition1 = !t.includes('<div class="pyv-afc-ads-container" style="visibility:visible">');
let condition2 = !t.includes('<span class="spell-correction-corrected">');
let condition3 = !t.includes('<div class="search-message">');
let condition4 = !t.includes('<li class="search-exploratory-line">');
return condition1 && condition2 && condition3 && condition4;
})
.map(t => PARSE_ITEM(t, body, searchString))
.filter(a => a)
.filter((_, index) => index < options.limit);
// Adjust tracker
options.limit -= resp.items.length;

// Get amount of results
resp.results = UTIL.between(UTIL.between(content, '<p class="num-results', '</p>'), '>') || '0';

// Get information about set filters
const filters = UTIL.parseFilters(content);
resp.filters = Array.from(filters).map(a => a[1].active).filter(a => a);

// Parse the nextpageRef
const pagesMatch = content.match(nextpagRegex);
if (pagesMatch) {
const pagesContainer = UTIL.between(content, pagesMatch[0], '</div>').split('<a');
const lastPageRef = pagesContainer[pagesContainer.length - 1];
resp.nextpageRef = UTIL.removeHtml(UTIL.between(lastPageRef, 'href="', '"')) || null;
}

// We're already on last page or hit the limit
if (!resp.nextpageRef || options.limit < 1) return resp;

// Recursively fetch more items
options.nextpageRef = resp.nextpageRef;
const nestedResp = await main(searchString, options);
// Merge the responses
resp.items.push(...nestedResp.items);
resp.currentRef = nestedResp.currentRef;
resp.nextpageRef = nestedResp.nextpageRef;
return resp;
options.clientVersion = UTIL.between(body, 'INNERTUBE_CONTEXT_CLIENT_VERSION":"', '"') ||
UTIL.between(body, 'innertube_context_client_version":"', '"');
return SEARCH_V2(searchString, options);
};

main.getFilters = async(searchString, options) => {
Expand Down
39 changes: 39 additions & 0 deletions lib/search_v2.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const AXIOS = require('axios');
const QS = require('querystring');
const UTIL_V2 = require('./util_v2.js');
const BASE_URL = 'https://www.youtube.com/results?';
const API_URL = 'https://www.youtube.com/youtubei/v1/search?key=';

const searchV2 = module.exports = async(searchString, options) => {
// Post request headers
const refererUrl = BASE_URL + QS.encode({ search_query: searchString });
const headers = { Origin: 'https://www.youtube.com', Referer: refererUrl, 'Content-Type': 'application/json' };

/** TODO:
* options = UTIL_V2.checkArgs(searchString, options);
*/
if (!options.limit) options.limit = 100;

// Post request
options.query = searchString;
const { data } = await AXIOS.post(API_URL + options.key, UTIL_V2.buildRequestBody(options), { headers });
// Parse response
let resp = options.nextpageRef ? UTIL_V2.continuationJSON(data) : UTIL_V2.mapJSON(data);
resp.items = resp.items.filter((_, index) => index < options.limit);
options.limit -= resp.items.length;

// We're already on last page or hit the limit
if (!resp.nextpageRef || options.limit < 1) return resp;

// Recursively fetch more items
options.nextpageRef = resp.nextpageRef;
// Merge the responses
const nestedResp = await searchV2(searchString, options);
resp.items.push(...nestedResp.items);
resp.nextpageRef = nestedResp.nextpageRef;
return resp;
};

/** TODO:
* getFilters()
*/
167 changes: 130 additions & 37 deletions lib/util_v2.js
Original file line number Diff line number Diff line change
@@ -1,57 +1,150 @@
const URL = require('url');
const VIDEO_URL = 'https://www.youtube.com/watch?v=';
const DEFAULT_CONTEXT = {
client: {
utcOffsetMinutes: 0,
gl: 'US',
hl: 'en',
clientName: 'WEB',
clientVersion: '2.20201023.02.00',
},
user: {},
request: {},
};

exports.mapJSON = (json) => {
exports.buildRequestBody = options => {
let body = { context: DEFAULT_CONTEXT };
// Should change utcOffsetMinutes too, idk .-.
// if (options && options.gl) body.context.client.gl = options.gl;
// if (options && options.hl) body.context.client.hl = options.hl;
if (options && options.clientVersion) body.context.client.clientVersion = options.clientVersion;
// Search query or continuation token
if (!options.nextpageRef) body.query = options.query;
else body.continuation = options.nextpageRef;
return body;
};

exports.mapJSON = json => {
const wrapper = json.contents.twoColumnSearchResultsRenderer.primaryContents.sectionListRenderer;
const filters = wrapper.subMenu.searchSubMenuRenderer.groups;
const items = wrapper.contents.find(x => Object.keys(x)[0] === 'itemSectionRenderer').itemSectionRenderer.contents;
const continuation = wrapper.contents.find(x => Object.keys(x)[0] === 'continuationItemRenderer').continuationItemRenderer;
const continuation = wrapper.contents.find(x => Object.keys(x)[0] === 'continuationItemRenderer');
let token = continuation ? continuation.continuationItemRenderer.continuationEndpoint.continuationCommand.token : '';

return {
query: null,
items: [].concat([{
type: 'search-refinements',
entries: json.refinements,
}], ...items.map(parseItem).filter(a => a)),
nextpageRef: null, // continuation,
nextpageRef: token,
results: json.estimatedResults,
filters: filters,
currentRef: null,
}
}
};
};

exports.continuationJSON = json => {
const continuationItems = json.onResponseReceivedCommands[0].appendContinuationItemsAction.continuationItems;
const items = continuationItems.find(x => Object.keys(x)[0] === 'itemSectionRenderer').itemSectionRenderer.contents;
const continuation = continuationItems.find(x => Object.keys(x)[0] === 'continuationItemRenderer');
let token = continuation ? continuation.continuationItemRenderer.continuationEndpoint.continuationCommand.token : '';

return {
query: null,
items: items.map(parseItem).filter(a => a),
nextpageRef: token,
currentRef: null,
};
};

let log = 0;
const parseItem = (item) => {
const parseItem = item => {
const type = Object.keys(item)[0];

if (type === 'videoRenderer') {
const author = item[type].ownerText.runs[0];
const isLive = Array.isArray(item[type].badges) && item[type].badges.some(a => a.metadataBadgeRenderer.label === 'LIVE NOW');
const upcoming = item[type].upcomingEventData ? Number(`${item[type].upcomingEventData.startTime}000`) : false;
return {
type: 'video',
live: isLive,
title: item[type].title.runs[0].text,
link: VIDEO_URL + item[type].videoId,
thumbnail: item[type].thumbnail.thumbnails.sort((a,b) => b.width - a.width)[0].url,

author: {
name: author.text,
ref: URL.resolve(VIDEO_URL, author.navigationEndpoint.commandMetadata.webCommandMetadata.url),
verified: Array.isArray(item[type].ownerBadges) && item[type].ownerBadges.some(a => a.metadataBadgeRenderer.tooltip === 'Verified'),
},

description: item[type].descriptionSnippet.runs.map(a => a.text).join(''),

views: upcoming ? null : item[type].viewCountText.simpleText || item[type].viewCountText.runs.map(a => a.text).join(''),
duration: isLive || upcoming ? null : item[type].lengthText.simpleText,
uploaded_at: isLive || upcoming ? null : item[type].publishedTimeText.simpleText,
};
} else if (type === 'shelfRenderer') {
// console.log(item);
} else if (type === 'horizontalCardListRenderer') {
// console.log(item);
} else {
console.log('unknown type:', type);
switch (type) {
case 'videoRenderer':
return parseVideo(item[type]);
case 'channelRenderer':
return parseChannel(item[type]);
case 'playlistRenderer':
return parsePlaylist(item[type]);
case 'radioRenderer':
return parseMix(item[type]);
case 'showingResultsForRenderer':
case 'backgroundPromoRenderer':
case 'shelfRenderer':
case 'horizontalCardListRenderer':
break;
default:
console.log('unknown type:', type);
break;
}
}
};

const parseVideo = obj => {
const author = obj.ownerText.runs[0];
const isLive = Array.isArray(obj.badges) && obj.badges.some(a => a.metadataBadgeRenderer.label === 'LIVE NOW');
const upcoming = obj.upcomingEventData ? Number(`${obj.upcomingEventData.startTime}000`) : false;
return {
type: 'video',
live: isLive,
title: obj.title.runs[0].text,
link: VIDEO_URL + obj.videoId,
thumbnail: obj.thumbnail.thumbnails.sort((a, b) => b.width - a.width)[0].url,
upcoming,

author: {
name: author.text,
ref: URL.resolve(VIDEO_URL, author.navigationEndpoint.commandMetadata.webCommandMetadata.url),
verified: obj.ownerBadges &&
(JSON.stringify(obj.ownerBadges).includes('OFFICIAL') || JSON.stringify(obj.ownerBadges).includes('VERIFIED')),
},

description: obj.descriptionSnippet ? obj.descriptionSnippet.runs.map(a => a.text).join('') : null,

views: !obj.viewCountText ? null : obj.viewCountText.simpleText || obj.viewCountText.runs.map(a => a.text).join(''),
duration: isLive || upcoming || !obj.lengthText ? null : obj.lengthText.simpleText,
uploaded_at: isLive || upcoming || !obj.publishedTimeText ? null : obj.publishedTimeText.simpleText,
};
};

const parseChannel = obj => ({
type: 'channel',
name: obj.title.simpleText,
channel_id: obj.channelId,
link: `https://www.youtube.com/channel/${obj.channelId}`,
avatar: obj.thumbnail.thumbnails.sort((a, b) => b.width - a.width)[0].url,
verified: obj.ownerBadges &&
(JSON.stringify(obj.ownerBadges).includes('OFFICIAL') || JSON.stringify(obj.ownerBadges).includes('VERIFIED')),

followers: obj.subscriberCountText ? obj.subscriberCountText.simpleText : null,
description_short: obj.descriptionSnippet ? obj.descriptionSnippet.runs.map(a => a.text).join('') : null,
videos: Number(obj.videoCountText ? obj.videoCountText.runs[0].text.replace(/[.,]/g, '') : 0),
});

const parsePlaylist = obj => {
let author = obj.shortBylineText ? obj.shortBylineText.runs[0] : obj.longBylineText ? obj.longBylineText.runs[0] : {};
return {
type: 'playlist',
title: obj.title.simpleText,
link: `https://www.youtube.com/playlist?list=${obj.playlistId}`,
thumbnail: obj.thumbnailRenderer,

author: {
name: author.text,
ref: `https://www.youtube.com/channel/${author.navigationEndpoint.browseEndpoint.browseId}`,
verified: obj.ownerBadges &&
(JSON.stringify(obj.ownerBadges).includes('OFFICIAL') || JSON.stringify(obj.ownerBadges).includes('VERIFIED')),
},

length: Number(obj.videoCount),
};
};

const parseMix = obj => ({
type: 'mix',
title: obj.title.simpleText,
firstItem: `https://www.youtube.com/watch?v=${obj.videos[0].childVideoRenderer.videoId}&list=${obj.playlistId}`,
thumbnail: obj.thumbnail.thumbnails.sort((a, b) => b.width - a.width)[0].url,
length: obj.videoCountText.runs[0].text,
});
15 changes: 14 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"test": "npm run-script nyc && codecov"
},
"dependencies": {
"axios": "^0.21.0",
"html-entities": "^1.3.1",
"miniget": "^2.0.1"
},
Expand Down