Skip to content

Commit

Permalink
arte: use recent list instead of categories
Browse files Browse the repository at this point in the history
ard: use experimental topic search
  • Loading branch information
alex1702 committed Feb 14, 2024
2 parents 8f562be + e7365c3 commit 5e9c030
Show file tree
Hide file tree
Showing 20 changed files with 1,565 additions and 175 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
group = 'de.mediathekview'
archivesBaseName = "MServer"
version = '3.1.227'
version = '3.1.228'

def jarName = 'MServer.jar'
def mainClass = 'mServer.Main'
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/mServer/crawler/sender/ard/ArdConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ public class ArdConstants {

public static final String ITEM_URL = API_URL + "/page-gateway/pages/ard/item/";

public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/shows/";
public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false";
public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/%s/editorials/%s?pageNumber=0&pageSize=%s";
public static final String TOPIC_URL = API_URL + "/page-gateway/widgets/ard/asset/%s?pageSize=%d";
public static final String DAY_PAGE_URL = API_URL + "/page-gateway/compilations/%s/pastbroadcasts?startDateTime=%sT00:00:00.000Z&endDateTime=%sT23:59:59.000Z&pageNumber=0&pageSize=%d";

public static final int DAY_PAGE_SIZE = 100;
public static final int TOPICS_COMPILATION_PAGE_SIZE = 200;
public static final int TOPIC_PAGE_SIZE = 50;

public static final String DEFAULT_CLIENT = "ard";
Expand Down
31 changes: 16 additions & 15 deletions src/main/java/mServer/crawler/sender/ard/ArdCrawler.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.sender.MediathekCrawler;
import mServer.crawler.sender.ard.tasks.ArdDayPageTask;
import mServer.crawler.sender.ard.tasks.ArdFilmDetailTask;
import mServer.crawler.sender.ard.tasks.ArdTopicPageTask;
import mServer.crawler.sender.ard.tasks.ArdTopicsOverviewTask;
import mServer.crawler.sender.ard.tasks.*;
import mServer.crawler.sender.base.CrawlerUrlDTO;

import java.time.LocalDateTime;
Expand All @@ -22,13 +19,12 @@

public class ArdCrawler extends MediathekCrawler {

public static final String SENDERNAME = Const.ARD;
private static final int MAX_DAYS_PAST = 2;
private static final int MAX_DAYS_PAST_AVAILABLE = 6;
private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER
= DateTimeFormatter.ofPattern("yyyy-MM-dd");

public static final String SENDERNAME = Const.ARD;

public ArdCrawler(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME, 0, 1, startPrio);
}
Expand Down Expand Up @@ -73,13 +69,13 @@ private void addDayUrls(ConcurrentLinkedQueue<CrawlerUrlDTO> dayUrlsToCrawl, Loc
}

private void addSpecialDays(
ConcurrentLinkedQueue<CrawlerUrlDTO> dayUrlsToCrawl) {
final LocalDateTime[] specialDates = new LocalDateTime[] {
ConcurrentLinkedQueue<CrawlerUrlDTO> dayUrlsToCrawl) {
final LocalDateTime[] specialDates = new LocalDateTime[]{
};

final LocalDateTime minDayOnline = LocalDateTime.now().minusDays(MAX_DAYS_PAST_AVAILABLE);

for(LocalDateTime specialDate : specialDates) {
for (LocalDateTime specialDate : specialDates) {
if (specialDate.isAfter(minDayOnline)) {
addDayUrls(dayUrlsToCrawl, specialDate);
}
Expand All @@ -95,7 +91,7 @@ protected RecursiveTask<Set<DatenFilm>> createCrawlerTask() {
if (CrawlerTool.loadLongMax()) {
shows.addAll(getTopicsEntries());
}

Log.sysLog("ARD Anzahl topics: " + shows.size());
getDaysEntries().forEach(show -> {
if (!shows.contains(show)) {
shows.add(show);
Expand Down Expand Up @@ -125,20 +121,25 @@ private Set<ArdFilmInfoDto> getTopicsEntries() throws ExecutionException, Interr
topics.addAll(getTopicEntriesBySender(client));
}

Log.sysLog("ard mediathek topics: " + topics.size());
ConcurrentLinkedQueue<CrawlerUrlDTO> topicUrls = new ConcurrentLinkedQueue<>(topics);

final ArdTopicPageTask topicTask = new ArdTopicPageTask(this, topicUrls);
final Set<ArdFilmInfoDto> filmInfos = forkJoinPool.submit(topicTask).get();
Log.sysLog("ard shows by topics: " + filmInfos.size());
return filmInfos;
}

private ConcurrentLinkedQueue<CrawlerUrlDTO> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException {
ArdTopicsOverviewTask topicsTask
= new ArdTopicsOverviewTask(this, createTopicsOverviewUrl(sender));
private Set<CrawlerUrlDTO> getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException {
ArdTopicsTask topicsTask
= new ArdTopicsTask(this, sender, createTopicsOverviewUrl(sender));

ConcurrentLinkedQueue<CrawlerUrlDTO> queue = new ConcurrentLinkedQueue<>(forkJoinPool.submit(topicsTask).get());
Log.sysLog(sender + " topic entries: " + queue.size());
return queue;
Log.sysLog(sender + " topics task entries: " + queue.size());

final Set<CrawlerUrlDTO> topicUrls = forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, queue)).get();
Log.sysLog(sender + " topics: " + topicUrls.size());
return topicUrls;
}

private ConcurrentLinkedQueue<CrawlerUrlDTO> createTopicsOverviewUrl(final String client) {
Expand Down
40 changes: 40 additions & 0 deletions src/main/java/mServer/crawler/sender/ard/PaginationUrlDto.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package mServer.crawler.sender.ard;

import mServer.crawler.sender.base.CrawlerUrlDTO;

import java.util.HashSet;
import java.util.Set;

public class PaginationUrlDto {
private final Set<CrawlerUrlDTO> urls = new HashSet<>();
private int actualPage;
private int maxPages;

public void addUrl(CrawlerUrlDTO url) {
urls.add(url);
}

public void addAll(Set<CrawlerUrlDTO> urls) {
this.urls.addAll(urls);
}

public Set<CrawlerUrlDTO> getUrls() {
return urls;
}

public int getActualPage() {
return actualPage;
}

public int getMaxPages() {
return maxPages;
}

public void setActualPage(int actualPage) {
this.actualPage = actualPage;
}

public void setMaxPages(int maxPages) {
this.maxPages = maxPages;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package mServer.crawler.sender.ard.json;


import com.google.gson.JsonArray;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import mServer.crawler.sender.ard.ArdConstants;
import mServer.crawler.sender.base.CrawlerUrlDTO;
import mServer.crawler.sender.base.JsonUtils;

import java.lang.reflect.Type;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;

public class ArdTopicsDeserializer implements JsonDeserializer<Set<CrawlerUrlDTO>> {
private static final String ELEMENT_WIDGETS = "widgets";
private static final String ELEMENT_LINKS = "links";
private static final String ELEMENT_SELF = "self";

private static final String ATTRIBUTE_ID = "id";

private final String sender;

public ArdTopicsDeserializer(String sender) {
this.sender = sender;
}

@Override
public Set<CrawlerUrlDTO> deserialize(
JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) {
final Set<CrawlerUrlDTO> result = new HashSet<>();

if (JsonUtils.hasElements(jsonElement, ELEMENT_WIDGETS)) {
final JsonArray widgets = jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_WIDGETS);
widgets.forEach(widget -> parseWidget(widget.getAsJsonObject()).ifPresent(result::add));
}

return result;
}

private Optional<CrawlerUrlDTO> parseWidget(final JsonElement compilation) {
if (JsonUtils.hasElements(compilation, ELEMENT_LINKS)) {
final JsonElement selfLink =
compilation.getAsJsonObject().get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_SELF);
final Optional<String> id =
JsonUtils.getAttributeAsString(selfLink.getAsJsonObject(), ATTRIBUTE_ID);
if (id.isPresent()) {
return Optional.of(
new CrawlerUrlDTO(
String.format(
ArdConstants.TOPICS_COMPILATION_URL,
sender,
id.get(),
ArdConstants.TOPICS_COMPILATION_PAGE_SIZE)));
}
}

return Optional.empty();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package mServer.crawler.sender.ard.json;

import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import mServer.crawler.sender.ard.ArdConstants;
import mServer.crawler.sender.ard.PaginationUrlDto;
import mServer.crawler.sender.base.CrawlerUrlDTO;
import mServer.crawler.sender.base.JsonUtils;

import java.lang.reflect.Type;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;

public class ArdTopicsLetterDeserializer implements JsonDeserializer<PaginationUrlDto> {

private static final String ELEMENT_TEASERS = "teasers";
private static final String ELEMENT_LINKS = "links";
private static final String ELEMENT_TARGET = "target";
private static final String ELEMENT_PAGE_NUMBER = "pageNumber";
private static final String ELEMENT_TOTAL_ELEMENTS = "totalElements";
private static final String ELEMENT_PAGE_SIZE = "pageSize";
private static final String ELEMENT_PAGINATION = "pagination";

private static final String ATTRIBUTE_ID = "id";

@Override
public PaginationUrlDto deserialize(
final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) {
final PaginationUrlDto results = new PaginationUrlDto();

if (!jsonElement.getAsJsonObject().has(ELEMENT_TEASERS)
|| !jsonElement.getAsJsonObject().get(ELEMENT_TEASERS).isJsonArray()
|| jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS).isEmpty()) {
return results;
}

jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS).forEach(teaser -> results.addAll(parseTeaser(teaser.getAsJsonObject())));

final JsonElement paginationElement = jsonElement.getAsJsonObject().get(ELEMENT_PAGINATION);
results.setActualPage(getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_NUMBER));
final int totalElements = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_TOTAL_ELEMENTS);
final int pageSize = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_SIZE);
int maxPageSize = pageSize == 0 ? 0 :
(totalElements+pageSize-1)/pageSize;
results.setMaxPages(maxPageSize);

return results;
}

private int getChildElementAsIntOrNullIfNotExist(
final JsonElement parentElement, final String childElementName) {
if (parentElement == null || parentElement.isJsonNull()) {
return 0;
}
return getJsonElementAsIntOrNullIfNotExist(
parentElement.getAsJsonObject().get(childElementName));
}

private int getJsonElementAsIntOrNullIfNotExist(final JsonElement element) {
if (element.isJsonNull()) {
return 0;
}
return element.getAsInt();
}

private Set<CrawlerUrlDTO> parseTeaser(final JsonObject teaserObject) {
final Set<CrawlerUrlDTO> results = new HashSet<>();

final Optional<String> id;

if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) {
final JsonObject targetObject =
teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject();
id = JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID);
} else {
id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID);
}

id.ifPresent(
nonNullId ->
results.add(
new CrawlerUrlDTO(
String.format(
ArdConstants.TOPIC_URL, nonNullId, ArdConstants.TOPIC_PAGE_SIZE))));

return results;
}
}
Loading

0 comments on commit 5e9c030

Please sign in to comment.