From 7b9aae56eb2dda35a34dfbdceee255d34edfaa92 Mon Sep 17 00:00:00 2001 From: sfrei Date: Sun, 12 May 2024 15:12:57 +0200 Subject: [PATCH] Fix YouTube tracks parsing through avoiding sponsored --- CHANGELOG.md | 12 +++- .../clients/soundcloud/SoundCloudUtility.java | 2 +- .../clients/youtube/YouTubeUtility.java | 71 +++++++++---------- .../SoundCloudTrackDeserializer.java | 10 +-- .../youtube/YouTubeListTrackDeserializer.java | 12 ++-- .../youtube/YouTubeURLTrackDeserializer.java | 6 +- .../tracksearch/utils/json/JsonElement.java | 21 ++++-- .../utils/json/JsonNodeResolver.java | 8 +-- 8 files changed, 78 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 716a31c..6f923b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,16 @@ Changelog ========= -0.9.1 - unrelesed ------------------ +0.9.1 - unreleased +------------------ -Nothing to see here yet +**Features:** + +- Updated dependencies + +**Bugfixes:** + +- Fix YouTube tracks parsing through avoiding sponsored 0.9.0 ----- diff --git a/src/main/java/io/sfrei/tracksearch/clients/soundcloud/SoundCloudUtility.java b/src/main/java/io/sfrei/tracksearch/clients/soundcloud/SoundCloudUtility.java index 98f1b3a..39639c6 100644 --- a/src/main/java/io/sfrei/tracksearch/clients/soundcloud/SoundCloudUtility.java +++ b/src/main/java/io/sfrei/tracksearch/clients/soundcloud/SoundCloudUtility.java @@ -113,7 +113,7 @@ protected GenericTrackList extractSoundCloudTracks(final String final JsonElement responseElement = JsonElement.readTreeCatching(MAPPER, json) .orElseThrow(() -> new SoundCloudException("Cannot parse SoundCloudTracks JSON")) - .path("collection"); + .paths("collection"); final List scTracks = responseElement.elements() .map(element -> element.mapCatching(MAPPER, SoundCloudTrack.SoundCloudTrackBuilder.class)) diff --git a/src/main/java/io/sfrei/tracksearch/clients/youtube/YouTubeUtility.java b/src/main/java/io/sfrei/tracksearch/clients/youtube/YouTubeUtility.java index 4e20173..a83012e 100644 --- a/src/main/java/io/sfrei/tracksearch/clients/youtube/YouTubeUtility.java +++ b/src/main/java/io/sfrei/tracksearch/clients/youtube/YouTubeUtility.java @@ -101,41 +101,40 @@ protected GenericTrackList extractYouTubeTracks(final String json, final JsonElement rootElement = JsonElement.readTreeCatching(MAPPER, json) .orElseThrow(() -> new YouTubeException("Cannot parse YouTubeTracks JSON")); - final JsonElement responseElement = rootElement.path("response").orElse(rootElement).elementAtIndex(1).path("response"); + final JsonElement responseElement = rootElement.paths("response").orElse(rootElement).elementAtIndex(1).paths("response"); final JsonElement defaultElement = responseElement.asUnresolved() - .path("contents", "twoColumnSearchResultsRenderer", "primaryContents", "sectionListRenderer", "contents"); + .paths("contents", "twoColumnSearchResultsRenderer", "primaryContents", "sectionListRenderer", "contents"); final JsonElement contentHolder = defaultElement - .firstElement() - .path("itemSectionRenderer") + .lastForPath("itemSectionRenderer") // Avoid sponsored .orElse(responseElement) - .path("onResponseReceivedCommands") + .paths("onResponseReceivedCommands") .firstElement() - .path("appendContinuationItemsAction", "continuationItems") + .paths("appendContinuationItemsAction", "continuationItems") .firstElement() - .path("itemSectionRenderer") + .paths("itemSectionRenderer") .orElse(responseElement) - .path("onResponseReceivedCommands") + .paths("onResponseReceivedCommands") .firstElement() - .path("appendContinuationItemsAction", "continuationItems") + .paths("appendContinuationItemsAction", "continuationItems") .firstElement() - .path("itemSectionRenderer") + .paths("itemSectionRenderer") .orElse(responseElement) - .path("continuationContents", "itemSectionContinuation", "itemSectionContinuation") + .paths("continuationContents", "itemSectionContinuation", "itemSectionContinuation") .orElse(responseElement) - .path("continuationContents", "sectionListContinuation", "contents") + .paths("continuationContents", "sectionListContinuation", "contents") .firstElement() - .path("itemSectionRenderer"); + .paths("itemSectionRenderer"); final String cToken = extractCToken(responseElement, defaultElement, contentHolder); - final JsonElement contents = contentHolder.asUnresolved().path("contents"); + final JsonElement contents = contentHolder.asUnresolved().paths("contents"); final List ytTracks = contents.elements() - .filter(content -> content.path("videoRenderer", "upcomingEventData").isNull()) // Avoid premieres - .filter(content -> content.path("promotedSparklesWebRenderer").isNull()) // Avoid ads - .map(content -> content.path("videoRenderer").orElse(content).path("searchPyvRenderer", "ads").firstElement().path("promotedVideoRenderer")) - .filter(renderer -> renderer.asUnresolved().path("lengthText").isPresent()) // Avoid live streams + .filter(content -> content.paths("videoRenderer", "upcomingEventData").isNull()) // Avoid premieres + .filter(content -> content.paths("promotedSparklesWebRenderer").isNull()) // Avoid ads + .map(content -> content.paths("videoRenderer").orElse(content).paths("searchPyvRenderer", "ads").firstElement().paths("promotedVideoRenderer")) + .filter(renderer -> renderer.asUnresolved().paths("lengthText").isPresent()) // Avoid live streams .map(renderer -> renderer.mapCatching(MAPPER, YouTubeTrack.ListYouTubeTrackBuilder.class)) .filter(Objects::nonNull) .map(YouTubeTrack.ListYouTubeTrackBuilder::getBuilder) @@ -156,20 +155,20 @@ protected GenericTrackList extractYouTubeTracks(final String json, private static String extractCToken(JsonElement responseElement, JsonElement defaultElement, JsonElement contentHolder) { if (contentHolder.nodePresent("continuations")) { return contentHolder.asUnresolved() - .path("continuations") + .paths("continuations") .firstElement() - .path("nextContinuationData") + .paths("nextContinuationData") .asString("continuation"); } return responseElement.asUnresolved() - .path("onResponseReceivedCommands") + .paths("onResponseReceivedCommands") .firstElement() - .path("appendContinuationItemsAction", "continuationItems") + .paths("appendContinuationItemsAction", "continuationItems") .elementAtIndex(1) - .path("continuationItemRenderer", "continuationEndpoint", "continuationCommand") + .paths("continuationItemRenderer", "continuationEndpoint", "continuationCommand") .orElse(defaultElement) .findElement("continuationItemRenderer") - .path("continuationEndpoint", "continuationCommand") + .paths("continuationEndpoint", "continuationCommand") .asString("token"); } @@ -181,7 +180,7 @@ protected YouTubeTrackInfo extractTrackInfo(final String json, final String trac final JsonElement playerElement; if (jsonElement.isArray()) { - playerElement = jsonElement.elementAtIndex(2).path("player"); + playerElement = jsonElement.elementAtIndex(2).paths("player"); } else { playerElement = jsonElement.findElement("player"); } @@ -190,26 +189,26 @@ protected YouTubeTrackInfo extractTrackInfo(final String json, final String trac final JsonElement streamingData; - final JsonElement playerArgs = playerElement.path("args"); + final JsonElement playerArgs = playerElement.paths("args"); if (playerElement.isPresent() && playerArgs.isPresent()) { - scriptUrl.set(playerElement.path("assets").asString("js")); + scriptUrl.set(playerElement.paths("assets").asString("js")); - streamingData = playerArgs.path("player_response") + streamingData = playerArgs.paths("player_response") .reReadTree(MAPPER) - .path("streamingData"); + .paths("streamingData"); } else { final JsonElement playerResponse = playerResponseFromTrackJSON(jsonElement); - streamingData = playerResponse.asUnresolved().path("streamingData"); + streamingData = playerResponse.asUnresolved().paths("streamingData"); } - final JsonElement formatsElement = streamingData.path("formats"); + final JsonElement formatsElement = streamingData.paths("formats"); final Stream formats = formatsElement.isPresent() ? getFormatsFromStream(formatsElement.arrayElements()) : Stream.empty(); - final Stream adaptiveFormatsStream = streamingData.path("adaptiveFormats").arrayElements(); + final Stream adaptiveFormatsStream = streamingData.paths("adaptiveFormats").arrayElements(); final Stream adaptiveFormats = getFormatsFromStream(adaptiveFormatsStream); final List trackFormats = Stream.concat(formats, adaptiveFormats).collect(Collectors.toList()); @@ -236,9 +235,9 @@ protected YouTubeTrackInfo extractTrackInfo(final String json, final String trac private static JsonElement playerResponseFromTrackJSON(JsonElement jsonElement) { return jsonElement.elementAtIndex(2) - .path("playerResponse") + .paths("playerResponse") .orElse(jsonElement) - .path("playerResponse"); + .paths("playerResponse"); } private Stream getFormatsFromStream(final Stream formats) { @@ -248,9 +247,9 @@ private Stream getFormatsFromStream(final Stream trackFormats = rootElement.path("media", "transcodings") + final List trackFormats = rootElement.paths("media", "transcodings") .arrayElements() .map(SoundCloudTrackDeserializer::transcodingToTrackFormat) .collect(Collectors.toList()); @@ -87,7 +87,7 @@ private static SoundCloudTrackFormat transcodingToTrackFormat(JsonElement transc final String formatUrl = transcoding.asString("url"); final String audioQuality = transcoding.asString("quality"); - final JsonElement formatElement = transcoding.path("format"); + final JsonElement formatElement = transcoding.paths("format"); final String mimeType = formatElement.asString("mime_type"); final String protocol = formatElement.asString("protocol"); diff --git a/src/main/java/io/sfrei/tracksearch/tracks/deserializer/youtube/YouTubeListTrackDeserializer.java b/src/main/java/io/sfrei/tracksearch/tracks/deserializer/youtube/YouTubeListTrackDeserializer.java index ed2e8c7..fe073f4 100644 --- a/src/main/java/io/sfrei/tracksearch/tracks/deserializer/youtube/YouTubeListTrackDeserializer.java +++ b/src/main/java/io/sfrei/tracksearch/tracks/deserializer/youtube/YouTubeListTrackDeserializer.java @@ -43,8 +43,8 @@ public YouTubeTrack.ListYouTubeTrackBuilder deserialize(final JsonParser p, fina // Track final String ref = rootElement.asString("videoId"); - final String title = rootElement.path("title", "runs").firstElement().asString("text"); - final String timeString = rootElement.path("lengthText").asString("simpleText"); + final String title = rootElement.paths("title", "runs").firstElement().asString("text"); + final String timeString = rootElement.paths("lengthText").asString("simpleText"); final Duration duration = TimeUtility.getDurationForTimeString(timeString); if (title == null || duration == null || ref == null) @@ -60,21 +60,21 @@ public YouTubeTrack.ListYouTubeTrackBuilder deserialize(final JsonParser p, fina // Metadata - final JsonElement owner = rootElement.path("ownerText", "runs").firstElement(); + final JsonElement owner = rootElement.paths("ownerText", "runs").firstElement(); final String channelName = owner.asString("text"); - final String channelUrlSuffix = owner.path("navigationEndpoint", "commandMetadata", "webCommandMetadata") + final String channelUrlSuffix = owner.paths("navigationEndpoint", "commandMetadata", "webCommandMetadata") .asString("url"); final String channelUrl = YouTubeClient.URL.concat(channelUrlSuffix); - final String streamAmountText = rootElement.path("viewCountText").asString("simpleText"); + final String streamAmountText = rootElement.paths("viewCountText").asString("simpleText"); final String streamAmountDigits = streamAmountText == null || streamAmountText.isEmpty() ? null : ReplaceUtility.replaceNonDigits(streamAmountText); final Long streamAmount = streamAmountDigits == null || streamAmountDigits.isEmpty() ? 0L : Long.parseLong(streamAmountDigits); - final Stream thumbNailStream = rootElement.path("thumbnail", "thumbnails").elements(); + final Stream thumbNailStream = rootElement.paths("thumbnail", "thumbnails").elements(); final Optional lastThumbnail = thumbNailStream.findFirst(); final String thumbNailUrl = lastThumbnail.map(thumbNail -> thumbNail.asString("url")).orElse(null); diff --git a/src/main/java/io/sfrei/tracksearch/tracks/deserializer/youtube/YouTubeURLTrackDeserializer.java b/src/main/java/io/sfrei/tracksearch/tracks/deserializer/youtube/YouTubeURLTrackDeserializer.java index 3f906de..e2de5cb 100644 --- a/src/main/java/io/sfrei/tracksearch/tracks/deserializer/youtube/YouTubeURLTrackDeserializer.java +++ b/src/main/java/io/sfrei/tracksearch/tracks/deserializer/youtube/YouTubeURLTrackDeserializer.java @@ -41,7 +41,7 @@ public YouTubeTrack.URLYouTubeTrackBuilder deserialize(final JsonParser p, final // Track - final JsonElement videoDetails = rootElement.path("videoDetails"); + final JsonElement videoDetails = rootElement.paths("videoDetails"); final String ref = videoDetails.asString("videoId"); final String title = videoDetails.asString("title"); @@ -61,7 +61,7 @@ public YouTubeTrack.URLYouTubeTrackBuilder deserialize(final JsonParser p, final // Metadata - final JsonElement owner = rootElement.path("microformat", "playerMicroformatRenderer"); + final JsonElement owner = rootElement.paths("microformat", "playerMicroformatRenderer"); final String channelName = owner.asString("ownerChannelName"); @@ -69,7 +69,7 @@ public YouTubeTrack.URLYouTubeTrackBuilder deserialize(final JsonParser p, final final long streamAmount = Long.parseLong(owner.asString("viewCount")); - final Stream thumbNailStream = owner.path("thumbnail", "thumbnails").elements(); + final Stream thumbNailStream = owner.paths("thumbnail", "thumbnails").elements(); final Optional firstThumbnail = thumbNailStream.findFirst(); final String thumbNailUrl = firstThumbnail.map(thumbNail -> thumbNail.asString("url")).orElse(null); diff --git a/src/main/java/io/sfrei/tracksearch/utils/json/JsonElement.java b/src/main/java/io/sfrei/tracksearch/utils/json/JsonElement.java index 48f1e01..9611951 100644 --- a/src/main/java/io/sfrei/tracksearch/utils/json/JsonElement.java +++ b/src/main/java/io/sfrei/tracksearch/utils/json/JsonElement.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import lombok.extern.slf4j.Slf4j; +import java.util.List; import java.util.Optional; import java.util.Spliterator; import java.util.Spliterators; @@ -76,18 +77,18 @@ public Stream arrayElements() { } public String asString(final String... paths) { - return super.asString(path(paths).node()); + return super.asString(paths(paths).node()); } public Long asLong(final String... paths) { - return getAsLong(path(paths).node()); + return getAsLong(paths(paths).node()); } - public JsonElement path(final String... paths) { - return nextElement(e -> nodeForPath(paths)); + public JsonElement paths(final String... paths) { + return nextElement(e -> nodeForPaths(paths)); } - private JsonNode nodeForPath(String... paths) { + private JsonNode nodeForPaths(String... paths) { if (paths.length == 0) return node(); @@ -106,6 +107,14 @@ public JsonElement firstElement() { return nextElement(node -> atIndex(0)); } + public JsonElement lastForPath(final String path) { + return nextElement(node -> { + final List nodes = node.findParents(path); + if (node.isEmpty()) return null; + return nodes.get(nodes.size() - 1).path(path); + }); + } + public JsonElement elementAtIndex(final int index) { return nextElement(node -> atIndex(index)); } @@ -150,7 +159,7 @@ public boolean isPresent() { } public boolean nodePresent(String path) { - return JsonElement.of(nodeForPath(path)).isPresent(); + return JsonElement.of(nodeForPaths(path)).isPresent(); } } diff --git a/src/main/java/io/sfrei/tracksearch/utils/json/JsonNodeResolver.java b/src/main/java/io/sfrei/tracksearch/utils/json/JsonNodeResolver.java index 3fa6410..6179541 100644 --- a/src/main/java/io/sfrei/tracksearch/utils/json/JsonNodeResolver.java +++ b/src/main/java/io/sfrei/tracksearch/utils/json/JsonNodeResolver.java @@ -34,12 +34,12 @@ public JsonNode node() { return node; } - protected boolean nodeIsNull(JsonNode node) { + protected boolean isNodeNull(JsonNode node) { return node == null || node.isNull(); } protected boolean nodeIsNull() { - return nodeIsNull(node); + return isNodeNull(node); } public boolean isArray() { @@ -51,11 +51,11 @@ protected ArrayNode toArrayNode() { } public String asString(final JsonNode node) { - return nodeIsNull(node) ? null : node.asText(); + return isNodeNull(node) ? null : node.asText(); } protected Long getAsLong(final JsonNode node) { - return nodeIsNull(node) ? null : node.asLong(); + return isNodeNull(node) ? null : node.asLong(); } protected JsonNode atIndex(final int index) {