Skip to content

Commit

Permalink
[PeerTube] Fix multi level comment replies
Browse files Browse the repository at this point in the history
  • Loading branch information
TobiGr committed Apr 20, 2023
1 parent b2b4260 commit f7118b8
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ public Page(final String url, final String id) {
this(url, id, null, null, null);
}

public Page(final String url, final String id, final byte[] body) {
this(url, id, null, null, body);
}

public Page(final String url, final byte[] body) {
this(url, null, null, null, body);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
Expand All @@ -17,6 +18,7 @@
import org.schabi.newpipe.extractor.utils.Utils;

import java.io.IOException;
import java.nio.charset.StandardCharsets;

import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.COUNT_KEY;
import static org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper.ITEMS_PER_PAGE;
Expand All @@ -26,6 +28,9 @@
import javax.annotation.Nonnull;

public class PeertubeCommentsExtractor extends CommentsExtractor {
static final String CHILDREN = "children";
private static final String IS_DELETED = "isDeleted";
private static final String TOTAL = "total";

/**
* Use {@link #isReply()} to access this variable.
Expand All @@ -49,7 +54,7 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage()
}
}

private boolean isReply() throws ParsingException {
boolean isReply() throws ParsingException {
if (isReply == null) {
if (getOriginalUrl().contains("/videos/watch/")) {
isReply = false;
Expand All @@ -67,22 +72,24 @@ private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector colle
for (final Object c : contents) {
if (c instanceof JsonObject) {
final JsonObject item = (JsonObject) c;
if (!item.getBoolean("isDeleted")) {
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this));
if (!item.getBoolean(IS_DELETED)) {
collector.commit(new PeertubeCommentsInfoItemExtractor(item, null, this));
}
}
}
}

private void collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json) throws ParsingException {
final JsonArray contents = json.getArray("children");
final JsonArray contents = json.getArray(CHILDREN);

for (final Object c : contents) {
if (c instanceof JsonObject) {
final JsonObject item = ((JsonObject) c).getObject("comment");
if (!item.getBoolean("isDeleted")) {
collector.commit(new PeertubeCommentsInfoItemExtractor(item, this));
final JsonObject content = (JsonObject) c;
final JsonObject item = content.getObject("comment");
final JsonArray children = content.getArray(CHILDREN);
if (!item.getBoolean(IS_DELETED)) {
collector.commit(new PeertubeCommentsInfoItemExtractor(item, children, this));
}
}
}
Expand All @@ -95,36 +102,46 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
throw new IllegalArgumentException("Page doesn't contain an URL");
}

final Response response = getDownloader().get(page.getUrl());

JsonObject json = null;
if (response != null && !Utils.isBlank(response.responseBody())) {
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
final long total;
if (page.getBody() == null) {
final Response response = getDownloader().get(page.getUrl());
if (response != null && !Utils.isBlank(response.responseBody())) {
try {
json = JsonParser.object().from(response.responseBody());
} catch (final Exception e) {
throw new ParsingException("Could not parse json data for comments info", e);
}
}
if (json != null) {
PeertubeParsingHelper.validate(json);
if (isReply() || json.has(CHILDREN)) {
total = json.getArray(CHILDREN).size();
collectRepliesFrom(collector, json);
} else {
total = json.getLong(TOTAL);
collectCommentsFrom(collector, json);
}
} else {
throw new ExtractionException("Unable to get PeerTube kiosk info");
}
} else {
try {
json = JsonParser.object().from(response.responseBody());
} catch (final Exception e) {
throw new ParsingException("Could not parse json data for comments info", e);
json = JsonParser.object().from(new String(page.getBody(), StandardCharsets.UTF_8));
isReply = true;
total = json.getArray(CHILDREN).size();
collectRepliesFrom(collector, json);
} catch (final JsonParserException e) {
throw new ParsingException(
"Could not parse json data for nested comments info", e);
}
}

if (json != null) {
PeertubeParsingHelper.validate(json);
final long total;
final CommentsInfoItemsCollector collector
= new CommentsInfoItemsCollector(getServiceId());
return new InfoItemsPage<>(collector,
PeertubeParsingHelper.getNextPage(page.getUrl(), total));

if (isReply() || json.has("children")) {
total = json.getArray("children").size();
collectRepliesFrom(collector, json);
} else {
total = json.getLong("total");
collectCommentsFrom(collector, json);
}

return new InfoItemsPage<>(collector,
PeertubeParsingHelper.getNextPage(page.getUrl(), total));
} else {
throw new ExtractionException("Unable to get PeerTube kiosk info");
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.schabi.newpipe.extractor.services.peertube.extractors;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;

import com.grack.nanojson.JsonWriter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.Page;
Expand All @@ -13,20 +15,36 @@
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.nio.charset.StandardCharsets;
import java.util.Objects;

import static org.schabi.newpipe.extractor.services.peertube.extractors.PeertubeCommentsExtractor.CHILDREN;

public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
@Nonnull
private final JsonObject item;
@Nullable
private final JsonArray children;
@Nonnull
private final String url;
@Nonnull
private final String baseUrl;
@Nonnull
private final PeertubeCommentsExtractor superCommentExtractor;

private Integer replyCount;

public PeertubeCommentsInfoItemExtractor(final JsonObject item,
final PeertubeCommentsExtractor extractor)
public PeertubeCommentsInfoItemExtractor(@Nonnull final JsonObject item,
@Nullable final JsonArray children,
@Nonnull final PeertubeCommentsExtractor extractor)
throws ParsingException {
this.item = item;
this.children = children;
this.url = extractor.getUrl();
this.baseUrl = extractor.getBaseUrl();
this.superCommentExtractor = extractor;
}

@Override
Expand Down Expand Up @@ -107,15 +125,34 @@ public String getUploaderUrl() throws ParsingException {
@Override
@Nullable
public Page getReplies() throws ParsingException {
if (JsonUtils.getNumber(item, "totalReplies").intValue() == 0) {
if (getReplyCount() == 0) {
return null;
}
final String threadId = JsonUtils.getNumber(item, "threadId").toString();
return new Page(url + "/" + threadId, threadId);
final String repliesUrl = url + "/" + threadId;
if (superCommentExtractor.isReply() && children != null && !children.isEmpty()) {
// Nested replies are already included in the original thread's request.
// Wrap the replies into a JsonObject, because the original thread's request body
// is also structured like a JsonObject.
final JsonObject pageContent = new JsonObject();
pageContent.put(CHILDREN, children);
return new Page(repliesUrl, threadId,
JsonWriter.string(pageContent).getBytes(StandardCharsets.UTF_8));
}
return new Page(repliesUrl, threadId);
}

@Override
public int getReplyCount() throws ParsingException {
return JsonUtils.getNumber(item, "totalReplies").intValue();
if (replyCount == null) {
if (children != null && !children.isEmpty()) {
// The totalReplies field is inaccurate for nested replies and sometimes returns 0
// although there are replies to that reply stored in children.
replyCount = children.size();
} else {
replyCount = JsonUtils.getNumber(item, "totalReplies").intValue();
}
}
return replyCount;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@

import java.io.IOException;
import java.util.List;
import java.util.Optional;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.*;
import static org.schabi.newpipe.extractor.ServiceList.PeerTube;

public class PeertubeCommentsExtractorTest {
Expand Down Expand Up @@ -121,4 +120,52 @@ void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
assertTrue(commentsInfo.getErrors().isEmpty());
}
}

/**
* Test a video that has comments with nested replies.
*/
public static class NestedComments {
private static PeertubeCommentsExtractor extractor;

@BeforeAll
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = (PeertubeCommentsExtractor) PeerTube
.getCommentsExtractor("https://share.tube/w/vxu4uTstUBAUromWwXGHrq");
}

@Test
void testGetComments() throws IOException, ExtractionException {
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
assertFalse(comments.getItems().isEmpty());
final Optional<CommentsInfoItem> nestedCommentHeadOpt =
comments.getItems()
.stream()
.filter(c -> c.getCommentId().equals("9770"))
.findFirst();
assertTrue(nestedCommentHeadOpt.isPresent());
assertTrue(findNestedCommentWithId("9773", nestedCommentHeadOpt.get()), "The nested comment replies were not found");
}
}

private static boolean findNestedCommentWithId(final String id, final CommentsInfoItem comment)
throws IOException, ExtractionException {
if (comment.getCommentId().equals(id)) {
return true;
}
return PeerTube
.getCommentsExtractor(comment.getUrl())
.getPage(comment.getReplies())
.getItems()
.stream()
.map(c -> {
try {
return findNestedCommentWithId(id, c);
} catch (final Exception ignored) {
return false;
}
})
.reduce((a, b) -> a || b)
.orElse(false);
}
}

0 comments on commit f7118b8

Please sign in to comment.