Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YouTube] Support handles and all channel usernames #964

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,41 +1,46 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler;

import java.util.regex.Pattern;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils;

import java.net.URL;
import java.util.List;

/*
* Created by Christian Schabesberger on 25.07.16.
*
* Copyright (C) Christian Schabesberger 2018 <chrź[email protected]>
* YoutubeChannelLinkHandlerFactory.java is part of NewPipe.
* YoutubeChannelLinkHandlerFactory.java is part of NewPipe Extractor.
*
* NewPipe is free software: you can redistribute it and/or modify
* NewPipe Extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* NewPipe Extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
*/

package org.schabi.newpipe.extractor.services.youtube.linkHandler;

import java.util.regex.Pattern;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils;

import javax.annotation.Nonnull;
import java.net.URL;
import java.util.List;

import static org.schabi.newpipe.extractor.utils.Utils.isBlank;

public final class YoutubeChannelLinkHandlerFactory extends ListLinkHandlerFactory {

private static final YoutubeChannelLinkHandlerFactory INSTANCE
= new YoutubeChannelLinkHandlerFactory();

private static final Pattern EXCLUDED_SEGMENTS =
Pattern.compile("playlist|watch|attribution_link|watch_popup|embed|feed|select_site");
private static final Pattern EXCLUDED_SEGMENTS = Pattern.compile(
// CHECKSTYLE:OFF
"playlist|watch|attribution_link|watch_popup|embed|feed|select_site|account|reporthistory|redirect");
// CHECKSTYLE:ON

private YoutubeChannelLinkHandlerFactory() {
}
Expand All @@ -45,10 +50,10 @@ public static YoutubeChannelLinkHandlerFactory getInstance() {
}

/**
* Returns URL to channel from an ID
* Returns the URL to a channel from an ID.
*
* @param id Channel ID including e.g. 'channel/'
* @return URL to channel
* @param id the channel ID including e.g. 'channel/'
* @return the URL to the channel
*/
@Override
public String getUrl(final String id,
Expand All @@ -58,16 +63,26 @@ public String getUrl(final String id,
}

/**
* Returns true if path conform to
* custom short channel URLs like youtube.com/yourcustomname
* Checks whether the given path conforms to custom short channel URLs like
* {@code youtube.com/yourcustomname}.
*
* @param splitPath path segments array
* @return true - if value conform to short channel URL, false - not
* @param splitPath the path segments array
* @return whether the value conform to short channel URLs
*/
private boolean isCustomShortChannelUrl(final String[] splitPath) {
private boolean isCustomShortChannelUrl(@Nonnull final String[] splitPath) {
return splitPath.length == 1 && !EXCLUDED_SEGMENTS.matcher(splitPath[0]).matches();
}

/**
* Checks whether the given path conforms to handle URLs like {@code youtube.com/@yourhandle}.
*
* @param splitPath the path segments array
* @return whether the value conform to handle URLs
*/
private boolean isHandle(@Nonnull final String[] splitPath) {
return splitPath.length > 0 && splitPath[0].startsWith("@");
}

@Override
public String getId(final String url) throws ParsingException {
try {
Expand All @@ -77,35 +92,38 @@ public String getId(final String url) throws ParsingException {
if (!Utils.isHTTP(urlObj) || !(YoutubeParsingHelper.isYoutubeURL(urlObj)
|| YoutubeParsingHelper.isInvidioURL(urlObj)
|| YoutubeParsingHelper.isHooktubeURL(urlObj))) {
throw new ParsingException("the URL given is not a Youtube-URL");
throw new ParsingException("The URL given is not a YouTube URL");
}

// remove leading "/"
// Remove leading "/"
path = path.substring(1);

String[] splitPath = path.split("/");

// Handle custom short channel URLs like youtube.com/yourcustomname
if (isCustomShortChannelUrl(splitPath)) {
if (isHandle(splitPath)) {
// Handle YouTube handle URLs like youtube.com/@yourhandle
return splitPath[0];
} else if (isCustomShortChannelUrl(splitPath)) {
// Handle custom short channel URLs like youtube.com/yourcustomname
path = "c/" + path;
splitPath = path.split("/");
}

if (!path.startsWith("user/")
&& !path.startsWith("channel/")
if (!path.startsWith("user/") && !path.startsWith("channel/")
&& !path.startsWith("c/")) {
throw new ParsingException("the URL given is neither a channel nor an user");
throw new ParsingException(
"The given URL is not a channel, a user or a handle URL");
}

final String id = splitPath[1];

if (id == null || !id.matches("[A-Za-z0-9_-]+")) {
throw new ParsingException("The given id is not a Youtube-Video-ID");
if (isBlank(id)) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we should use something else to match all custom names of channels or not?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible to have a channel ID that does not match the old pattern, e.g. by consisting of non latin characters?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

throw new ParsingException("The given ID is not a YouTube channel or user ID");
}

return splitPath[0] + "/" + id;
} catch (final Exception exception) {
throw new ParsingException("Error could not parse url :" + exception.getMessage(),
exception);
} catch (final Exception e) {
throw new ParsingException("Could not parse URL :" + e.getMessage(), e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ public static void setUp() throws Exception {
YoutubeTestsUtils.ensureStateless();
NewPipe.init(DownloaderFactory.getDownloader(RESOURCE_PATH + "gronkh"));
extractor = (YoutubeChannelExtractor) YouTube
.getChannelExtractor("http://www.youtube.com/user/Gronkh");
.getChannelExtractor("http://www.youtube.com/@Gronkh");
extractor.fetchPage();
}

Expand Down Expand Up @@ -185,7 +185,7 @@ public void testUrl() throws ParsingException {

@Test
public void testOriginalUrl() throws ParsingException {
assertEquals("http://www.youtube.com/user/Gronkh", extractor.getOriginalUrl());
assertEquals("http://www.youtube.com/@Gronkh", extractor.getOriginalUrl());
}

/*//////////////////////////////////////////////////////////////////////////
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ public static void setUp() {
}

@Test
public void acceptUrlTest() throws ParsingException {
void acceptUrlTest() throws ParsingException {
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Gronkh"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/user/Netzkino/videos"));

assertTrue(linkHandler.acceptUrl("https://www.youtube.com/c/creatoracademy"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/c/%EB%85%B8%EB%A7%88%EB%93%9C%EC%BD%94%EB%8D%94NomadCoders"));

assertTrue(linkHandler.acceptUrl("https://youtube.com/DIMENSI0N"));

Expand All @@ -49,6 +50,7 @@ public void acceptUrlTest() throws ParsingException {
assertTrue(linkHandler.acceptUrl("https://invidio.us/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1"));
assertTrue(linkHandler.acceptUrl("https://www.youtube.com/watchismo"));

assertTrue(linkHandler.acceptUrl("https://www.youtube.com/@YouTube"));

// do not accept URLs which are not channels
assertFalse(linkHandler.acceptUrl("https://www.youtube.com/watch?v=jZViOEv90dI&t=100"));
Expand All @@ -62,14 +64,13 @@ public void acceptUrlTest() throws ParsingException {
}

@Test
public void getIdFromUrl() throws ParsingException {
void getIdFromUrl() throws ParsingException {
assertEquals("user/Gronkh", linkHandler.fromUrl("https://www.youtube.com/user/Gronkh").getId());
assertEquals("user/Netzkino", linkHandler.fromUrl("https://www.youtube.com/user/Netzkino/videos").getId());

assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA").getId());
assertEquals("channel/UClq42foiSgl7sSpLupnugGA", linkHandler.fromUrl("https://www.youtube.com/channel/UClq42foiSgl7sSpLupnugGA/videos?disable_polymer=1").getId());


assertEquals("user/Gronkh", linkHandler.fromUrl("https://hooktube.com/user/Gronkh").getId());
assertEquals("user/Netzkino", linkHandler.fromUrl("https://hooktube.com/user/Netzkino/videos").getId());

Expand All @@ -84,5 +85,9 @@ public void getIdFromUrl() throws ParsingException {

assertEquals("c/creatoracademy", linkHandler.fromUrl("https://www.youtube.com/c/creatoracademy").getId());
assertEquals("c/YouTubeCreators", linkHandler.fromUrl("https://www.youtube.com/c/YouTubeCreators").getId());
assertEquals("c/%EB%85%B8%EB%A7%88%EB%93%9C%EC%BD%94%EB%8D%94NomadCoders", linkHandler.fromUrl("https://www.youtube.com/c/%EB%85%B8%EB%A7%88%EB%93%9C%EC%BD%94%EB%8D%94NomadCoders").getId());

assertEquals("@Gronkh", linkHandler.fromUrl("https://www.youtube.com/@Gronkh?ucbcb=1").getId());
assertEquals("@YouTubeCreators", linkHandler.fromUrl("https://www.youtube.com/@YouTubeCreators/shorts").getId());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Wed, 02 Nov 2022 17:40:36 GMT"
"Wed, 02 Nov 2022 23:12:52 GMT"
],
"expires": [
"Wed, 02 Nov 2022 17:40:36 GMT"
"Wed, 02 Nov 2022 23:12:52 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See http://support.google.com/accounts/answer/151657?hl\u003den-GB for more info.\""
Expand All @@ -59,9 +59,9 @@
"ESF"
],
"set-cookie": [
"YSC\u003dIPHKPblTox0; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dThu, 06-Feb-2020 17:40:36 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+350; expires\u003dFri, 01-Nov-2024 17:40:36 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
"YSC\u003daFOfH_xu8k4; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dThu, 06-Feb-2020 23:12:52 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"CONSENT\u003dPENDING+976; expires\u003dFri, 01-Nov-2024 23:12:52 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"strict-transport-security": [
"max-age\u003d31536000"
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -305,11 +305,7 @@
111,
109,
47,
117,
115,
101,
114,
47,
64,
71,
114,
111,
Expand Down Expand Up @@ -338,10 +334,10 @@
"application/json; charset\u003dUTF-8"
],
"date": [
"Wed, 02 Nov 2022 17:40:38 GMT"
"Wed, 02 Nov 2022 23:12:53 GMT"
],
"expires": [
"Wed, 02 Nov 2022 17:40:38 GMT"
"Wed, 02 Nov 2022 23:12:53 GMT"
],
"p3p": [
"CP\u003d\"This is not a P3P policy! See g.co/p3phelp for more info.\""
Expand All @@ -350,7 +346,7 @@
"scaffolding on HTTPServer2"
],
"set-cookie": [
"CONSENT\u003dPENDING+067; expires\u003dFri, 01-Nov-2024 17:40:38 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
"CONSENT\u003dPENDING+288; expires\u003dFri, 01-Nov-2024 23:12:53 GMT; path\u003d/; domain\u003d.youtube.com; Secure"
],
"vary": [
"Origin",
Expand All @@ -367,7 +363,7 @@
"0"
]
},
"responseBody": "{\"responseContext\":{\"visitorData\":\"CgtRX2dsZzVrWGRZQSiW14qbBg%3D%3D\",\"serviceTrackingParams\":[{\"service\":\"CSI\",\"params\":[{\"key\":\"c\",\"value\":\"WEB\"},{\"key\":\"cver\",\"value\":\"2.20221101.00.00\"},{\"key\":\"yt_li\",\"value\":\"0\"},{\"key\":\"ResolveUrl_rid\",\"value\":\"0x1a56ddef412e73c5\"}]},{\"service\":\"GFEEDBACK\",\"params\":[{\"key\":\"logged_in\",\"value\":\"0\"},{\"key\":\"e\",\"value\":\"1714240,9453586,9453587,23804281,23882502,23918597,23934970,23946420,23966208,23983296,23986022,23998056,24001373,24002022,24002025,24004644,24007246,24034168,24036948,24077241,24080738,24120819,24135310,24140247,24152443,24161116,24162920,24164186,24166867,24169501,24175559,24181174,24184445,24185614,24187043,24187377,24191629,24199724,24211178,24216872,24219359,24219713,24224266,24229161,24241378,24248092,24248955,24249296,24254502,24255543,24255545,24260783,24262346,24263273,24263796,24265820,24267564,24267570,24268142,24274310,24276618,24278596,24279196,24279628,24280997,24283093,24283556,24286005,24286017,24287169,24287327,24287604,24287795,24288045,24288912,24290971,24291857,24292955,24292977,24299747,24390675,24391541,24391851,24392269,24392403,24392421,24393382,24394397,24396645,24396819,24398124,24398991,24399052,24399918,24400658,24401557,24406381,24406984,24407199,24410009,39322399,39322504,39322574\"}]},{\"service\":\"GUIDED_HELP\",\"params\":[{\"key\":\"logged_in\",\"value\":\"0\"}]},{\"service\":\"ECATCHER\",\"params\":[{\"key\":\"client.version\",\"value\":\"2.20221101\"},{\"key\":\"client.name\",\"value\":\"WEB\"},{\"key\":\"client.fexp\",\"value\":\"24283556,24249296,24185614,24263273,24274310,1714240,24262346,24263796,24260783,24267564,24187043,23986022,24406381,24299747,24399052,24248092,24211178,24291857,24152443,24135310,24407199,24191629,24229161,24396819,24241378,24391541,24292977,24164186,24169501,24276618,24400658,24280997,24401557,24120819,24288045,39322574,24224266,24034168,23934970,24002022,24181174,24287795,24175559,23882502,24283093,24001373,24292955,24290971,24161116,9453587,24393382,24410009,24391851,24288912,24394397,24392269,24002025,24268142,24399918,24390675,24278596,24216872,24286005,24219713,24080738,23918597,39322504,24279196,24392403,24396645,9453586,24279628,39322399,24265820,24254502,24036948,24162920,24398991,24255545,24166867,23998056,24007246,24392421,24255543,24398124,24286017,24140247,23804281,24287169,24287327,23983296,24406984,24199724,23966208,24184445,24219359,24267570,24248955,23946420,24187377,24077241,24004644,24287604\"}]}],\"mainAppWebResponseContext\":{\"loggedOut\":true},\"webResponseContextExtensionData\":{\"hasDecorated\":true}},\"endpoint\":{\"clickTrackingParams\":\"IhMI1s3RtYWQ-wIV99URCB21lwMbMghleHRlcm5hbA\u003d\u003d\",\"commandMetadata\":{\"webCommandMetadata\":{\"url\":\"/youtubei/v1/navigation/resolve_url\",\"webPageType\":\"WEB_PAGE_TYPE_CHANNEL\",\"rootVe\":3611,\"apiUrl\":\"/youtubei/v1/browse\"},\"resolveUrlCommandMetadata\":{\"isVanityUrl\":true}},\"browseEndpoint\":{\"browseId\":\"UCYJ61XIK64sp6ZFFS8sctxw\",\"params\":\"EgC4AQDyBgQKAjIA\"}}}",
"responseBody": "{\"responseContext\":{\"visitorData\":\"CgtieTdiV1lXeWFmTSj18oubBg%3D%3D\",\"serviceTrackingParams\":[{\"service\":\"CSI\",\"params\":[{\"key\":\"c\",\"value\":\"WEB\"},{\"key\":\"cver\",\"value\":\"2.20221101.00.00\"},{\"key\":\"yt_li\",\"value\":\"0\"},{\"key\":\"ResolveUrl_rid\",\"value\":\"0x01203331c856c87e\"}]},{\"service\":\"GFEEDBACK\",\"params\":[{\"key\":\"logged_in\",\"value\":\"0\"},{\"key\":\"e\",\"value\":\"1714254,23804281,23882502,23918597,23934970,23940247,23946420,23966208,23983296,23986015,23998056,24001373,24002022,24002025,24004644,24007246,24034168,24036948,24077241,24080738,24120819,24135310,24140247,24152443,24161116,24162919,24164186,24166867,24169501,24181174,24185614,24187043,24187377,24191629,24199724,24211178,24218780,24219713,24224266,24224808,24229161,24241378,24248091,24254502,24255543,24255545,24256985,24260783,24262346,24262775,24263796,24265820,24267564,24267570,24268142,24273932,24278596,24279196,24279628,24280221,24283093,24283556,24286003,24286017,24286291,24287169,24287327,24287795,24288045,24288912,24290842,24290971,24291857,24292955,24297748,24298082,24299548,24299747,24390376,24390675,24390916,24391541,24392399,24393382,24394397,24396645,24396818,24398124,24398981,24400943,24401137,24401291,24401557,24406381,24406605,24406984,24407200,24408325,39322399,39322504,39322574\"}]},{\"service\":\"GUIDED_HELP\",\"params\":[{\"key\":\"logged_in\",\"value\":\"0\"}]},{\"service\":\"ECATCHER\",\"params\":[{\"key\":\"client.version\",\"value\":\"2.20221101\"},{\"key\":\"client.name\",\"value\":\"WEB\"},{\"key\":\"client.fexp\",\"value\":\"24135310,24080738,24255543,24267570,23918597,24406605,24164186,24396818,24398124,23804281,24406984,24401137,24286291,23983296,24219713,23966208,24287327,24287169,24291857,24398981,24248091,24152443,24286003,24140247,24286017,24390675,24290842,24036948,24224808,24396645,1714254,24218780,39322399,24161116,24299747,23940247,24401291,24400943,24278596,24256985,24001373,23946420,24268142,24298082,24290971,24077241,24292955,24408325,24229161,24169501,24401557,24391541,24241378,24390916,24297748,24224266,24002022,24280221,23934970,24407200,24034168,24262775,39322574,24181174,23882502,24211178,24120819,24265820,24288045,39322504,24254502,24288912,24166867,24255545,24393382,24279628,24394397,24002025,24279196,24273932,24191629,24283093,24263796,24187043,24406381,24390376,24267564,24260783,24287795,24392399,24004644,24007246,23986015,24299548,24162919,24187377,24283556,24262346,23998056,24185614,24199724\"}]}],\"mainAppWebResponseContext\":{\"loggedOut\":true},\"webResponseContextExtensionData\":{\"hasDecorated\":true}},\"endpoint\":{\"clickTrackingParams\":\"IhMIz4TI18-Q-wIV5YE4Ch01jgrjMghleHRlcm5hbA\u003d\u003d\",\"commandMetadata\":{\"webCommandMetadata\":{\"url\":\"/youtubei/v1/navigation/resolve_url\",\"webPageType\":\"WEB_PAGE_TYPE_CHANNEL\",\"rootVe\":3611,\"apiUrl\":\"/youtubei/v1/browse\"},\"resolveUrlCommandMetadata\":{\"isVanityUrl\":true}},\"browseEndpoint\":{\"browseId\":\"UCYJ61XIK64sp6ZFFS8sctxw\",\"params\":\"EgC4AQDyBgQKAjIA\"}}}",
"latestUrl": "https://www.youtube.com/youtubei/v1/navigation/resolve_url?key\u003dAIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8\u0026prettyPrint\u003dfalse"
}
}

Large diffs are not rendered by default.

Loading