From 172138c89b6e321aceaa7a61bfb581b7c7ba2c3d Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Mon, 5 Feb 2024 20:57:03 +0100 Subject: [PATCH 1/6] update deep search --- .../mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java index bc508760..b735be93 100644 --- a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java @@ -75,6 +75,10 @@ public class ArdTopicPageTask extends ArdTaskBase TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL21kci5kZS9zZW5kZXJlaWhlbi9zdGFmZmVsc2VyaWUtaW4tYWxsZXItZnJldW5kc2NoYWZ0"); // Super.markt TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3JiYi1vbmxpbmUuZGUvc3VwZXJtYXJrdA"); + // Verrückt nach Meer + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy92ZXJydWVja3QtbmFjaC1tZWVy"); + // Dahoam is dahoam + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy9icm9hZGNhc3RTZXJpZXM6L2JyZGUvZmVybnNlaGVuL2JheWVyaXNjaGVzLWZlcm5zZWhlbi9zZW5kdW5nZW4vZGFob2FtLWlzLWRhaG9hbQ"); } public ArdTopicPageTask(MediathekReader aCrawler, From 16a648215813ac1682dcc2b18e172b14402a4864 Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Mon, 5 Feb 2024 22:18:21 +0100 Subject: [PATCH 2/6] use new experimental topics-urls --- .../crawler/sender/ard/ArdConstants.java | 4 +- .../crawler/sender/ard/ArdCrawler.java | 31 +++--- .../crawler/sender/ard/PaginationUrlDto.java | 40 +++++++ .../ard/json/ArdTopicsDeserializer.java | 62 +++++++++++ .../ard/json/ArdTopicsLetterDeserializer.java | 91 +++++++++++++++ .../json/ArdTopicsOverviewDeserializer.java | 104 ------------------ .../sender/ard/tasks/ArdTopicsLetterTask.java | 80 ++++++++++++++ ...csOverviewTask.java => ArdTopicsTask.java} | 36 +++--- 8 files changed, 310 insertions(+), 138 deletions(-) create mode 100644 src/main/java/mServer/crawler/sender/ard/PaginationUrlDto.java create mode 100644 src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java create mode 100644 src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java delete mode 100644 src/main/java/mServer/crawler/sender/ard/json/ArdTopicsOverviewDeserializer.java create mode 100644 src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsLetterTask.java rename src/main/java/mServer/crawler/sender/ard/tasks/{ArdTopicsOverviewTask.java => ArdTopicsTask.java} (53%) diff --git a/src/main/java/mServer/crawler/sender/ard/ArdConstants.java b/src/main/java/mServer/crawler/sender/ard/ArdConstants.java index bae17898..3a149181 100644 --- a/src/main/java/mServer/crawler/sender/ard/ArdConstants.java +++ b/src/main/java/mServer/crawler/sender/ard/ArdConstants.java @@ -7,11 +7,13 @@ public class ArdConstants { public static final String ITEM_URL = API_URL + "/page-gateway/pages/ard/item/"; - public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/shows/"; + public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false"; + public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/%s/editorials/%s?pageNumber=0&pageSize=%s"; public static final String TOPIC_URL = API_URL + "/page-gateway/widgets/ard/asset/%s?pageSize=%d"; public static final String DAY_PAGE_URL = API_URL + "/page-gateway/compilations/%s/pastbroadcasts?startDateTime=%sT00:00:00.000Z&endDateTime=%sT23:59:59.000Z&pageNumber=0&pageSize=%d"; public static final int DAY_PAGE_SIZE = 100; + public static final int TOPICS_COMPILATION_PAGE_SIZE = 200; public static final int TOPIC_PAGE_SIZE = 50; public static final String DEFAULT_CLIENT = "ard"; diff --git a/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java b/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java index c6bf04ac..812419d5 100644 --- a/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java +++ b/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java @@ -6,10 +6,7 @@ import mServer.crawler.CrawlerTool; import mServer.crawler.FilmeSuchen; import mServer.crawler.sender.MediathekCrawler; -import mServer.crawler.sender.ard.tasks.ArdDayPageTask; -import mServer.crawler.sender.ard.tasks.ArdFilmDetailTask; -import mServer.crawler.sender.ard.tasks.ArdTopicPageTask; -import mServer.crawler.sender.ard.tasks.ArdTopicsOverviewTask; +import mServer.crawler.sender.ard.tasks.*; import mServer.crawler.sender.base.CrawlerUrlDTO; import java.time.LocalDateTime; @@ -22,13 +19,12 @@ public class ArdCrawler extends MediathekCrawler { + public static final String SENDERNAME = Const.ARD; private static final int MAX_DAYS_PAST = 2; private static final int MAX_DAYS_PAST_AVAILABLE = 6; private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd"); - public static final String SENDERNAME = Const.ARD; - public ArdCrawler(FilmeSuchen ssearch, int startPrio) { super(ssearch, SENDERNAME, 0, 1, startPrio); } @@ -73,13 +69,13 @@ private void addDayUrls(ConcurrentLinkedQueue dayUrlsToCrawl, Loc } private void addSpecialDays( - ConcurrentLinkedQueue dayUrlsToCrawl) { - final LocalDateTime[] specialDates = new LocalDateTime[] { + ConcurrentLinkedQueue dayUrlsToCrawl) { + final LocalDateTime[] specialDates = new LocalDateTime[]{ }; final LocalDateTime minDayOnline = LocalDateTime.now().minusDays(MAX_DAYS_PAST_AVAILABLE); - for(LocalDateTime specialDate : specialDates) { + for (LocalDateTime specialDate : specialDates) { if (specialDate.isAfter(minDayOnline)) { addDayUrls(dayUrlsToCrawl, specialDate); } @@ -95,7 +91,7 @@ protected RecursiveTask> createCrawlerTask() { if (CrawlerTool.loadLongMax()) { shows.addAll(getTopicsEntries()); } - + Log.sysLog("ARD Anzahl topics: " + shows.size()); getDaysEntries().forEach(show -> { if (!shows.contains(show)) { shows.add(show); @@ -125,20 +121,25 @@ private Set getTopicsEntries() throws ExecutionException, Interr topics.addAll(getTopicEntriesBySender(client)); } + Log.sysLog("ard mediathek topics: " + topics.size()); ConcurrentLinkedQueue topicUrls = new ConcurrentLinkedQueue<>(topics); final ArdTopicPageTask topicTask = new ArdTopicPageTask(this, topicUrls); final Set filmInfos = forkJoinPool.submit(topicTask).get(); + Log.sysLog("ard shows by topics: " + filmInfos.size()); return filmInfos; } - private ConcurrentLinkedQueue getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException { - ArdTopicsOverviewTask topicsTask - = new ArdTopicsOverviewTask(this, createTopicsOverviewUrl(sender)); + private Set getTopicEntriesBySender(final String sender) throws ExecutionException, InterruptedException { + ArdTopicsTask topicsTask + = new ArdTopicsTask(this, sender, createTopicsOverviewUrl(sender)); ConcurrentLinkedQueue queue = new ConcurrentLinkedQueue<>(forkJoinPool.submit(topicsTask).get()); - Log.sysLog(sender + " topic entries: " + queue.size()); - return queue; + Log.sysLog(sender + " topics task entries: " + queue.size()); + + final Set topicUrls = forkJoinPool.submit(new ArdTopicsLetterTask(this, sender, queue)).get(); + Log.sysLog(sender + " topics: " + topicUrls.size()); + return topicUrls; } private ConcurrentLinkedQueue createTopicsOverviewUrl(final String client) { diff --git a/src/main/java/mServer/crawler/sender/ard/PaginationUrlDto.java b/src/main/java/mServer/crawler/sender/ard/PaginationUrlDto.java new file mode 100644 index 00000000..19da8588 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/ard/PaginationUrlDto.java @@ -0,0 +1,40 @@ +package mServer.crawler.sender.ard; + +import mServer.crawler.sender.base.CrawlerUrlDTO; + +import java.util.HashSet; +import java.util.Set; + +public class PaginationUrlDto { + private final Set urls = new HashSet<>(); + private int actualPage; + private int maxPages; + + public void addUrl(CrawlerUrlDTO url) { + urls.add(url); + } + + public void addAll(Set urls) { + this.urls.addAll(urls); + } + + public Set getUrls() { + return urls; + } + + public int getActualPage() { + return actualPage; + } + + public int getMaxPages() { + return maxPages; + } + + public void setActualPage(int actualPage) { + this.actualPage = actualPage; + } + + public void setMaxPages(int maxPages) { + this.maxPages = maxPages; + } +} diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java new file mode 100644 index 00000000..73fa2dbc --- /dev/null +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsDeserializer.java @@ -0,0 +1,62 @@ +package mServer.crawler.sender.ard.json; + + +import com.google.gson.JsonArray; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import mServer.crawler.sender.ard.ArdConstants; +import mServer.crawler.sender.base.CrawlerUrlDTO; +import mServer.crawler.sender.base.JsonUtils; + +import java.lang.reflect.Type; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +public class ArdTopicsDeserializer implements JsonDeserializer> { + private static final String ELEMENT_WIDGETS = "widgets"; + private static final String ELEMENT_LINKS = "links"; + private static final String ELEMENT_SELF = "self"; + + private static final String ATTRIBUTE_ID = "id"; + + private final String sender; + + public ArdTopicsDeserializer(String sender) { + this.sender = sender; + } + + @Override + public Set deserialize( + JsonElement jsonElement, Type type, JsonDeserializationContext jsonDeserializationContext) { + final Set result = new HashSet<>(); + + if (JsonUtils.hasElements(jsonElement, ELEMENT_WIDGETS)) { + final JsonArray widgets = jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_WIDGETS); + widgets.forEach(widget -> parseWidget(widget.getAsJsonObject()).ifPresent(result::add)); + } + + return result; + } + + private Optional parseWidget(final JsonElement compilation) { + if (JsonUtils.hasElements(compilation, ELEMENT_LINKS)) { + final JsonElement selfLink = + compilation.getAsJsonObject().get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_SELF); + final Optional id = + JsonUtils.getAttributeAsString(selfLink.getAsJsonObject(), ATTRIBUTE_ID); + if (id.isPresent()) { + return Optional.of( + new CrawlerUrlDTO( + String.format( + ArdConstants.TOPICS_COMPILATION_URL, + sender, + id.get(), + ArdConstants.TOPICS_COMPILATION_PAGE_SIZE))); + } + } + + return Optional.empty(); + } +} diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java new file mode 100644 index 00000000..161f66cb --- /dev/null +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsLetterDeserializer.java @@ -0,0 +1,91 @@ +package mServer.crawler.sender.ard.json; + +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonDeserializer; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import mServer.crawler.sender.ard.ArdConstants; +import mServer.crawler.sender.ard.PaginationUrlDto; +import mServer.crawler.sender.base.CrawlerUrlDTO; +import mServer.crawler.sender.base.JsonUtils; + +import java.lang.reflect.Type; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +public class ArdTopicsLetterDeserializer implements JsonDeserializer { + + private static final String ELEMENT_TEASERS = "teasers"; + private static final String ELEMENT_LINKS = "links"; + private static final String ELEMENT_TARGET = "target"; + private static final String ELEMENT_PAGE_NUMBER = "pageNumber"; + private static final String ELEMENT_TOTAL_ELEMENTS = "totalElements"; + private static final String ELEMENT_PAGE_SIZE = "pageSize"; + private static final String ELEMENT_PAGINATION = "pagination"; + + private static final String ATTRIBUTE_ID = "id"; + + @Override + public PaginationUrlDto deserialize( + final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) { + final PaginationUrlDto results = new PaginationUrlDto(); + + if (!jsonElement.getAsJsonObject().has(ELEMENT_TEASERS) + || !jsonElement.getAsJsonObject().get(ELEMENT_TEASERS).isJsonArray() + || jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS).isEmpty()) { + return results; + } + + jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS).forEach(teaser -> results.addAll(parseTeaser(teaser.getAsJsonObject()))); + + final JsonElement paginationElement = jsonElement.getAsJsonObject().get(ELEMENT_PAGINATION); + results.setActualPage(getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_NUMBER)); + final int totalElements = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_TOTAL_ELEMENTS); + final int pageSize = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_SIZE); + int maxPageSize = pageSize == 0 ? 0 : + (totalElements+pageSize-1)/pageSize; + results.setMaxPages(maxPageSize); + + return results; + } + + private int getChildElementAsIntOrNullIfNotExist( + final JsonElement parentElement, final String childElementName) { + if (parentElement == null || parentElement.isJsonNull()) { + return 0; + } + return getJsonElementAsIntOrNullIfNotExist( + parentElement.getAsJsonObject().get(childElementName)); + } + + private int getJsonElementAsIntOrNullIfNotExist(final JsonElement element) { + if (element.isJsonNull()) { + return 0; + } + return element.getAsInt(); + } + + private Set parseTeaser(final JsonObject teaserObject) { + final Set results = new HashSet<>(); + + final Optional id; + + if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) { + final JsonObject targetObject = + teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject(); + id = JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID); + } else { + id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID); + } + + id.ifPresent( + nonNullId -> + results.add( + new CrawlerUrlDTO( + String.format( + ArdConstants.TOPIC_URL, nonNullId, ArdConstants.TOPIC_PAGE_SIZE)))); + + return results; + } +} diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsOverviewDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsOverviewDeserializer.java deleted file mode 100644 index 8c35dcae..00000000 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdTopicsOverviewDeserializer.java +++ /dev/null @@ -1,104 +0,0 @@ -package mServer.crawler.sender.ard.json; - -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import java.lang.reflect.Type; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; -import mServer.crawler.sender.ard.ArdConstants; -import mServer.crawler.sender.base.CrawlerUrlDTO; -import mServer.crawler.sender.base.JsonUtils; - -public class ArdTopicsOverviewDeserializer implements JsonDeserializer> { - - private static final String ELEMENT_COMPILATIONS = "compilations"; - private static final String ELEMENT_PUBLICATION_SERVICE = "publicationService"; - private static final String ELEMENT_TEASERS = "teasers"; - private static final String ELEMENT_LINKS = "links"; - private static final String ELEMENT_TARGET = "target"; - private static final String ELEMENT_WIDGETS = "widgets"; - - private static final String ATTRIBUTE_ID = "id"; - private static final String ATTRIBUTE_NAME = "name"; - - private static final String[] IGNORED_SENDER = new String[] {"zdf", "kika", "3sat", "arte"}; - - @Override - public Set deserialize(JsonElement jsonElement, Type type, - JsonDeserializationContext context) { - Set results = new HashSet<>(); - - if (!jsonElement.getAsJsonObject().has(ELEMENT_WIDGETS) - || !jsonElement.getAsJsonObject().get(ELEMENT_WIDGETS).isJsonArray() - || jsonElement.getAsJsonObject().getAsJsonArray(ELEMENT_WIDGETS).size() == 0 - || !jsonElement - .getAsJsonObject() - .getAsJsonArray(ELEMENT_WIDGETS) - .get(0) - .getAsJsonObject() - .has(ELEMENT_COMPILATIONS)) { - return results; - } - - final JsonObject compilationObject = jsonElement.getAsJsonObject() - .getAsJsonArray(ELEMENT_WIDGETS).get(0) - .getAsJsonObject().get(ELEMENT_COMPILATIONS).getAsJsonObject(); - - for (Entry letterEntry : compilationObject.entrySet()) { - results.addAll(parseLetter(letterEntry.getValue().getAsJsonObject())); - } - - return results; - } - - private Set parseLetter(final JsonObject letterObject) { - Set results = new HashSet<>(); - - if (!letterObject.getAsJsonObject().has(ELEMENT_TEASERS) - || !letterObject.getAsJsonObject().get(ELEMENT_TEASERS).isJsonArray()) { - return results; - } - - for (JsonElement teaserElement : - letterObject.getAsJsonObject().getAsJsonArray(ELEMENT_TEASERS)) { - JsonObject teaserObject = teaserElement.getAsJsonObject(); - Optional id; - if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) { - JsonObject targetObject = - teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject(); - id = JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_ID); - } else { - id = JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_ID); - } - - if (isRelevant(teaserObject)) { - id.ifPresent(s -> results.add(new CrawlerUrlDTO( - (ArdConstants.TOPIC_URL).formatted(s, ArdConstants.TOPIC_PAGE_SIZE)))); - } - } - - return results; - } - - - private boolean isRelevant(final JsonObject teaserObject) { - if (teaserObject.has(ELEMENT_PUBLICATION_SERVICE)) { - final JsonObject publicationService = - teaserObject.get(ELEMENT_PUBLICATION_SERVICE).getAsJsonObject(); - final Optional attributeAsString = - JsonUtils.getAttributeAsString(publicationService, ATTRIBUTE_NAME); - if (attributeAsString.isPresent()) { - - return !Arrays.stream(IGNORED_SENDER) - .anyMatch(sender -> sender.equalsIgnoreCase(attributeAsString.get())); - } - } - - return true; - } -} diff --git a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsLetterTask.java b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsLetterTask.java new file mode 100644 index 00000000..233eb884 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsLetterTask.java @@ -0,0 +1,80 @@ +package mServer.crawler.sender.ard.tasks; + +import com.google.gson.reflect.TypeToken; +import jakarta.ws.rs.client.WebTarget; +import mServer.crawler.sender.MediathekReader; +import mServer.crawler.sender.ard.PaginationUrlDto; +import mServer.crawler.sender.ard.json.ArdTopicsLetterDeserializer; +import mServer.crawler.sender.base.AbstractRecursivConverterTask; +import mServer.crawler.sender.base.CrawlerUrlDTO; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.lang.reflect.Type; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class ArdTopicsLetterTask extends ArdTaskBase { + private static final Logger LOG = LogManager.getLogger(ArdTopicsLetterTask.class); + private static final String PAGE_NUMBER = "pageNumber"; + private static final String URL_PAGE_NUMBER_REPLACE_REGEX = PAGE_NUMBER + "=\\d+"; + private static final String PAGE_NUMBER_URL_ENCODED = PAGE_NUMBER + "="; + private static final Type PAGINATION_URL_DTO_TYPE_TOKEN = + new TypeToken() { + }.getType(); + private final String sender; + + public ArdTopicsLetterTask( + final MediathekReader crawler, + final String sender, + final ConcurrentLinkedQueue urlToCrawlDtos) { + super(crawler, urlToCrawlDtos); + this.sender = sender; + registerJsonDeserializer(PAGINATION_URL_DTO_TYPE_TOKEN, new ArdTopicsLetterDeserializer()); + } + + @Override + protected AbstractRecursivConverterTask createNewOwnInstance( + final ConcurrentLinkedQueue aElementsToProcess) { + return new ArdTopicsLetterTask(crawler, sender, aElementsToProcess); + } + + @Override + protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarget) { + final PaginationUrlDto results = deserialize(aTarget, PAGINATION_URL_DTO_TYPE_TOKEN); + LOG.debug("Found {} shows for {}.", results.getUrls().size(), sender); + taskResults.addAll(results.getUrls()); + + if (results.getActualPage() == 0 && results.getMaxPages() > 1) { + final ConcurrentLinkedQueue subpages = createSubPageUrls(aTarget, results.getMaxPages()); + if (!subpages.isEmpty()) { + taskResults.addAll(createNewOwnInstance(subpages).fork().join()); + } + } + } + + private ConcurrentLinkedQueue createSubPageUrls(final WebTarget aTarget, final int maxPages) { + + return IntStream.range(1, maxPages) + .mapToObj(subpageNumber -> changePageNumber(aTarget, subpageNumber)) + .map(CrawlerUrlDTO::new) + .distinct() + .collect(Collectors.toCollection(ConcurrentLinkedQueue::new)); + } + + private String changePageNumber(final WebTarget aTarget, final int newPageNumber) { + return aTarget.getUri().toString().contains(PAGE_NUMBER) + ? aTarget + .getUriBuilder() + .replaceQuery( + aTarget + .getUri() + .getRawQuery() + .replaceAll( + URL_PAGE_NUMBER_REPLACE_REGEX, PAGE_NUMBER_URL_ENCODED + newPageNumber)) + .build() + .toString() + : aTarget.queryParam(PAGE_NUMBER, newPageNumber).getUri().toString(); + } +} diff --git a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsOverviewTask.java b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsTask.java similarity index 53% rename from src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsOverviewTask.java rename to src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsTask.java index cc0191a0..ed99a5e7 100644 --- a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsOverviewTask.java +++ b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicsTask.java @@ -1,41 +1,41 @@ package mServer.crawler.sender.ard.tasks; import com.google.gson.reflect.TypeToken; -import de.mediathekview.mlib.Config; -import java.lang.reflect.Type; -import java.util.Set; -import java.util.concurrent.ConcurrentLinkedQueue; import jakarta.ws.rs.client.WebTarget; import mServer.crawler.sender.MediathekReader; -import mServer.crawler.sender.ard.json.ArdTopicsOverviewDeserializer; -import mServer.crawler.sender.base.CrawlerUrlDTO; +import mServer.crawler.sender.ard.json.ArdTopicsDeserializer; import mServer.crawler.sender.base.AbstractRecursivConverterTask; +import mServer.crawler.sender.base.CrawlerUrlDTO; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; -public class ArdTopicsOverviewTask extends ArdTaskBase { +import java.lang.reflect.Type; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; +public class ArdTopicsTask extends ArdTaskBase { + private static final Logger LOG = LogManager.getLogger(ArdTopicsTask.class); private static final Type SET_CRAWLER_URL_TYPE_TOKEN = new TypeToken>() { }.getType(); + private final String sender; - public ArdTopicsOverviewTask(MediathekReader aCrawler, - ConcurrentLinkedQueue aUrlToCrawlDtos) { - super(aCrawler, aUrlToCrawlDtos); - - registerJsonDeserializer(SET_CRAWLER_URL_TYPE_TOKEN, new ArdTopicsOverviewDeserializer()); + public ArdTopicsTask(MediathekReader aCrawler, String sender, ConcurrentLinkedQueue urlToCrawlDTOs) { + super(aCrawler, urlToCrawlDTOs); + this.sender = sender; + registerJsonDeserializer(SET_CRAWLER_URL_TYPE_TOKEN, new ArdTopicsDeserializer(sender)); } @Override protected AbstractRecursivConverterTask createNewOwnInstance( ConcurrentLinkedQueue aElementsToProcess) { - return new ArdTopicsOverviewTask(crawler, aElementsToProcess); + return new ArdTopicsTask(this.crawler, sender, aElementsToProcess); } @Override protected void processRestTarget(CrawlerUrlDTO aDTO, WebTarget aTarget) { - if (Config.getStop()) { - return; - } - - Set results = deserialize(aTarget, SET_CRAWLER_URL_TYPE_TOKEN); + final Set results = deserialize(aTarget, SET_CRAWLER_URL_TYPE_TOKEN); + LOG.debug("Found {} topics for {}.", results.size(), sender); taskResults.addAll(results); + } } From f9576395361ca512ecfe3637286dbddc5a75ab10 Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Sun, 11 Feb 2024 16:07:12 +0100 Subject: [PATCH 3/6] arte: use recent list instead of categories --- .../ArteCategoryFilmListDeserializer.java | 36 +- .../sender/arte/ArteCategoryFilmsDTO.java | 11 +- .../sender/arte/ArteListBaseDeserializer.java | 55 ++ .../sender/arte/ArteSubPageDeserializer.java | 28 + .../crawler/sender/arte/MediathekArte.java | 40 +- .../ArteCategoryFilmListDeserializerTest.java | 8 +- .../arte/ArteSubPageDeserializerTest.java | 55 ++ .../resources/arte/arte_video_list1.json | 482 ++++++++++++++++++ .../resources/arte/arte_video_list2.json | 353 +++++++++++++ .../resources/arte/arte_video_list_last.json | 213 ++++++++ 10 files changed, 1245 insertions(+), 36 deletions(-) create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteListBaseDeserializer.java create mode 100644 src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java create mode 100644 src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java create mode 100644 src/test/developTest/resources/arte/arte_video_list1.json create mode 100644 src/test/developTest/resources/arte/arte_video_list2.json create mode 100644 src/test/developTest/resources/arte/arte_video_list_last.json diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java index 7484c964..bec62c5c 100644 --- a/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java +++ b/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializer.java @@ -1,24 +1,19 @@ package mServer.crawler.sender.arte; -import com.google.gson.JsonDeserializationContext; -import com.google.gson.JsonDeserializer; -import com.google.gson.JsonElement; -import com.google.gson.JsonObject; -import com.google.gson.JsonParseException; +import com.google.gson.*; import de.mediathekview.mlib.tool.Log; import java.lang.reflect.Type; +import java.util.Optional; /** * Deserialisiert Ergebnisse der Anfrage den Filmen einer Kategorie. * Beispiel-URL: * https://www.arte.tv/guide/api/api/zones/de/web/videos_subcategory_CMG/?page=1&limit=100 */ -public class ArteCategoryFilmListDeserializer implements JsonDeserializer { +public class ArteCategoryFilmListDeserializer extends ArteListBaseDeserializer implements JsonDeserializer { private static final String JSON_ELEMENT_CONTENT = "content"; - private static final String JSON_ELEMENT_DATA = "data"; - private static final String JSON_ELEMENT_PROGRAMID = "programId"; private static final String JSON_ELEMENT_VALUE = "value"; private static final String JSON_ELEMENT_ZONES = "zones"; @@ -39,30 +34,13 @@ public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, Js for (JsonElement jsonElement : zoneElement.getAsJsonArray()) { if(jsonElement.getAsJsonObject().has(JSON_ELEMENT_CONTENT)) { final JsonObject contentObject = jsonElement.getAsJsonObject().get(JSON_ELEMENT_CONTENT).getAsJsonObject(); - if (contentObject.has(JSON_ELEMENT_DATA)) { - for(JsonElement dataElement : contentObject.get(JSON_ELEMENT_DATA).getAsJsonArray()) { - if (!dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).isJsonNull()) { - String programId = dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).getAsString(); - if (programId != null) { - if (programId.startsWith("RC-")) { - try { - long collectionId = Long.parseLong(programId.replace("RC-", "")); - dto.addCollection(String.format("RC-%06d", collectionId)); - } catch (NumberFormatException e) { - Log.errorLog(12834939, "Invalid collection id: " + programId); - } - } else { - dto.addProgramId(programId); - } - } - } - } - } + extractProgramIdFromData(contentObject, dto); + + Optional url = parsePagination(contentObject); + url.ifPresent(dto::setNextPageUrl); } } - dto.setNextPage(false); - return dto; } } diff --git a/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java b/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java index 7cd40a62..70b2aeb7 100644 --- a/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java +++ b/src/main/java/mServer/crawler/sender/arte/ArteCategoryFilmsDTO.java @@ -8,7 +8,7 @@ public class ArteCategoryFilmsDTO { private final Set programIds = new HashSet<>(); private final Set collectionIds = new HashSet<>(); - private boolean hasNextPage; + private String nextPageUrl; public void addProgramId(String aProgramId) { programIds.add(aProgramId); @@ -25,10 +25,13 @@ public Set getCollectionIds() { } public boolean hasNextPage() { - return hasNextPage; + return nextPageUrl != null && !nextPageUrl.isEmpty(); } - public void setNextPage(boolean aNextPage) { - hasNextPage = aNextPage; + public String getNextPageUrl() { + return nextPageUrl; + } + public void setNextPageUrl(String url) { + nextPageUrl = url; } } diff --git a/src/main/java/mServer/crawler/sender/arte/ArteListBaseDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteListBaseDeserializer.java new file mode 100644 index 00000000..ea844c34 --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteListBaseDeserializer.java @@ -0,0 +1,55 @@ +package mServer.crawler.sender.arte; + +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import de.mediathekview.mlib.tool.Log; +import mServer.crawler.sender.base.JsonUtils; +import mServer.crawler.sender.base.UrlUtils; + +import java.util.Optional; + +public abstract class ArteListBaseDeserializer { + + private static final String JSON_ELEMENT_DATA = "data"; + private static final String JSON_ELEMENT_PROGRAMID = "programId"; + private static final String JSON_ELEMENT_PAGINATION = "pagination"; + private static final String JSON_ELEMENT_LINKS = "links"; + private static final String JSON_ELEMENT_NEXT = "next"; + + protected Optional parsePagination(JsonObject jsonObject) { + if (jsonObject.has(JSON_ELEMENT_PAGINATION) && !jsonObject.get(JSON_ELEMENT_PAGINATION).isJsonNull()) { + final JsonObject pagionationObject = jsonObject.get(JSON_ELEMENT_PAGINATION).getAsJsonObject(); + if(pagionationObject.has(JSON_ELEMENT_LINKS)) { + final JsonObject linksObject = pagionationObject.get(JSON_ELEMENT_LINKS).getAsJsonObject(); + final Optional nextUrl = JsonUtils.getAttributeAsString(linksObject, JSON_ELEMENT_NEXT); + if (nextUrl.isPresent()) { + return Optional.of(UrlUtils.addDomainIfMissing(nextUrl.get().replace("/api/emac/", "/api/rproxy/emac/"), "https://www.arte.tv")); + } + } + } + return Optional.empty(); + } + + + protected void extractProgramIdFromData(JsonObject jsonObectWithData, ArteCategoryFilmsDTO dto) { + if (jsonObectWithData.has(JSON_ELEMENT_DATA)) { + for(JsonElement dataElement : jsonObectWithData.get(JSON_ELEMENT_DATA).getAsJsonArray()) { + if (!dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).isJsonNull()) { + Optional programId = JsonUtils.getAttributeAsString(dataElement.getAsJsonObject(), JSON_ELEMENT_PROGRAMID); + if (programId.isPresent()) { + if (programId.get().startsWith("RC-")) { + try { + long collectionId = Long.parseLong(programId.get().replace("RC-", "")); + dto.addCollection(String.format("RC-%06d", collectionId)); + } catch (NumberFormatException e) { + Log.errorLog(12834939, "Invalid collection id: " + programId); + } + } else { + dto.addProgramId(programId.get()); + } + } + } + } + } + } +} diff --git a/src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java b/src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java new file mode 100644 index 00000000..7dd96c3a --- /dev/null +++ b/src/main/java/mServer/crawler/sender/arte/ArteSubPageDeserializer.java @@ -0,0 +1,28 @@ +package mServer.crawler.sender.arte; + +import com.google.gson.*; + +import java.lang.reflect.Type; +import java.util.Optional; + +public class ArteSubPageDeserializer extends ArteListBaseDeserializer implements JsonDeserializer { + private static final String JSON_ELEMENT_VALUE = "value"; + + @Override + public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException { + final ArteCategoryFilmsDTO dto = new ArteCategoryFilmsDTO(); + + JsonElement rootElement = aJsonElement; + if (aJsonElement.getAsJsonObject().has(JSON_ELEMENT_VALUE)) { + rootElement = aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VALUE); + } + + JsonObject rootObject = rootElement.getAsJsonObject(); + extractProgramIdFromData(rootObject, dto); + + Optional url = parsePagination(rootObject); + url.ifPresent(dto::setNextPageUrl); + + return dto; + } +} diff --git a/src/main/java/mServer/crawler/sender/arte/MediathekArte.java b/src/main/java/mServer/crawler/sender/arte/MediathekArte.java index d8d382ad..73a81d0e 100644 --- a/src/main/java/mServer/crawler/sender/arte/MediathekArte.java +++ b/src/main/java/mServer/crawler/sender/arte/MediathekArte.java @@ -121,7 +121,8 @@ public void addToList() { meldungThreadUndFertig(); } else { if (CrawlerTool.loadLongMax()) { - addCategories(); + addRecentList(); + //addCategories(); meldungAddMax(listeThemen.size()); for (int t = 0; t < getMaxThreadLaufen(); ++t) { @@ -151,6 +152,13 @@ private void addCategories() { }); } + private void addRecentList() { + senderLanguages.forEach((sender, langCode) -> { + String u = String.format("https://www.arte.tv/api/rproxy/emac/v4/%s/web/pages/MOST_RECENT/", langCode); + listeThemen.add(new String[]{sender, langCode, "recent", u}); + }); + } + private void addTage() { senderLanguages.forEach((sender, langCode) -> { // http://www.arte.tv/guide/de/plus7/videos?day=-2&page=1&isLoading=true&sort=newest&country=DE @@ -203,6 +211,7 @@ private void addFilmeForTag(String sender, String aUrl) { */ class CategoryLoader extends Thread { + private int subPage = 0; @Override public void run() { @@ -229,6 +238,8 @@ private void loadCategory(String sender, String langCode, String aCategory, Stri Gson gsonCollectionChild = new GsonBuilder() .registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCollectionChildDeserializer()) .create(); + Gson gsonNextPage =new GsonBuilder() + .registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteSubPageDeserializer()).create(); ArteCategoryFilmsDTO dto = loadSubCategoryPage(gson, sender, aUrl); if (dto != null) { @@ -238,9 +249,36 @@ private void loadCategory(String sender, String langCode, String aCategory, Stri ListeFilme loadedFilme = loadPrograms(sender, langCode, dto); loadedFilme.forEach(film -> addFilm(film)); Log.sysLog(String.format("%s: category %s: %d Filme", sender, aCategory, loadedFilme.size())); + if (dto.hasNextPage()) { + loadNextPage(sender, langCode, aCategory, dto.getNextPageUrl(), gsonCollectionParent, gsonCollectionChild, gsonNextPage); + } } } + private void loadNextPage(String sender, String langCode, String aCategory, String url, Gson gsonCollectionParent, Gson gsonCollectionChild, Gson gsonNextPage) { + subPage++; + ArteCategoryFilmsDTO dto = loadSubCategoryPage(gsonNextPage, sender, url); + if (dto != null) { + loadCollections(sender, langCode, gsonCollectionParent, gsonCollectionChild, dto); + Log.sysLog(String.format("%s: category %s: %d programs, %d collections", sender, aCategory, dto.getProgramIds().size(), dto.getCollectionIds().size())); + // alle programIds verarbeiten + ListeFilme loadedFilme = loadPrograms(sender, langCode, dto); + loadedFilme.forEach(film -> addFilm(film)); + Log.sysLog(String.format("%s: category %s - page %d: %d Filme", sender, aCategory, subPage, loadedFilme.size())); + if (dto.hasNextPage() && shouldLoadNextPage(sender)) { + loadNextPage(sender, langCode, aCategory, dto.getNextPageUrl(), gsonCollectionParent, gsonCollectionChild, gsonNextPage); + } + } + } + + private boolean shouldLoadNextPage(String sender) { + if (sender == Const.ARTE_DE) { + return true; + } + + return subPage < 5; + } + private void loadCollections(String sender, String langCode, Gson gsonParent, Gson gsonChild, ArteCategoryFilmsDTO dto) { dto.getCollectionIds().forEach(collectionId -> { final String url = String.format(COLLECTION_URL, langCode, collectionId); diff --git a/src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java b/src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java index f2c21956..b545e407 100644 --- a/src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java +++ b/src/test/developTest/java/mServer/crawler/sender/arte/ArteCategoryFilmListDeserializerTest.java @@ -21,18 +21,21 @@ public class ArteCategoryFilmListDeserializerTest { private final String jsonFile; private final String[] expectedProgramIds; private final boolean expectedHasNextPage; + private final String expectedNextPageUrl; private final ArteCategoryFilmListDeserializer target; - public ArteCategoryFilmListDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage) { + public ArteCategoryFilmListDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage, String nextPageUrl) { jsonFile = aJsonFile; expectedProgramIds = aProgramIds; expectedHasNextPage = aNextPage; + expectedNextPageUrl = nextPageUrl; this.target = new ArteCategoryFilmListDeserializer(); } @Parameterized.Parameters public static Collection data() { return Arrays.asList(new Object[][]{ - {"/arte/arte_category.json", new String[]{"112511-000-A", "047389-000-A", "109066-000-A", "082669-000-A", "003982-000-A", "021109-000-A"}, false}, + {"/arte/arte_category.json", new String[]{"112511-000-A", "047389-000-A", "109066-000-A", "082669-000-A", "003982-000-A", "021109-000-A"}, false, null}, + {"/arte/arte_video_list1.json", new String[]{"033559-000-A","078154-000-A", "101398-000-A", "109332-000-A", "111063-000-A"}, true, "https://www.arte.tv/api/rproxy/emac/v4/de/web/zones/daeadc71-4306-411a-8590-1c1f484ef5aa/content?abv=B&authorizedCountry=DE&page=2&pageId=MOST_RECENT&zoneIndexInPage=0"} }); } @@ -47,5 +50,6 @@ public void testDeserialize() { assertThat(actual.hasNextPage(), equalTo(expectedHasNextPage)); Set actualProgramIds = actual.getProgramIds(); assertThat(actualProgramIds, Matchers.containsInAnyOrder(expectedProgramIds)); + assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl)); } } diff --git a/src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java b/src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java new file mode 100644 index 00000000..90aa9608 --- /dev/null +++ b/src/test/developTest/java/mServer/crawler/sender/arte/ArteSubPageDeserializerTest.java @@ -0,0 +1,55 @@ +package mServer.crawler.sender.arte; + +import com.google.gson.JsonObject; +import mServer.test.JsonFileReader; +import org.hamcrest.Matchers; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.junit.Assert.assertThat; + +@RunWith(Parameterized.class) +public class ArteSubPageDeserializerTest { + + private final String jsonFile; + private final String[] expectedProgramIds; + private final boolean expectedHasNextPage; + private final String expectedNextPageUrl; + private final ArteSubPageDeserializer target; + public ArteSubPageDeserializerTest(String aJsonFile, String[] aProgramIds, boolean aNextPage, String nextPageUrl) { + jsonFile = aJsonFile; + expectedProgramIds = aProgramIds; + expectedHasNextPage = aNextPage; + expectedNextPageUrl = nextPageUrl; + this.target = new ArteSubPageDeserializer(); + } + + @Parameterized.Parameters + public static Collection data() { + return Arrays.asList(new Object[][]{ + {"/arte/arte_video_list2.json", new String[]{"099708-000-A", "098846-000-A", "111648-001-A", "112235-000-A", "113043-139-A"}, true, "https://www.arte.tv/api/rproxy/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=3&pageId=MOST_VIEWED&zoneIndexInPage=0"}, + {"/arte/arte_video_list_last.json", new String[]{"102805-000-A","104017-000-A", "106273-006-A"}, false, null} + }); + } + + @Test + public void testDeserialize() { + + JsonObject jsonObject = JsonFileReader.readJson(jsonFile); + + ArteCategoryFilmsDTO actual = target.deserialize(jsonObject, ArteCategoryFilmsDTO.class, null); + + assertThat(actual, notNullValue()); + assertThat(actual.hasNextPage(), equalTo(expectedHasNextPage)); + Set actualProgramIds = actual.getProgramIds(); + assertThat(actualProgramIds, Matchers.containsInAnyOrder(expectedProgramIds)); + assertThat(actual.getNextPageUrl(), equalTo(expectedNextPageUrl)); + } +} diff --git a/src/test/developTest/resources/arte/arte_video_list1.json b/src/test/developTest/resources/arte/arte_video_list1.json new file mode 100644 index 00000000..c47a2496 --- /dev/null +++ b/src/test/developTest/resources/arte/arte_video_list1.json @@ -0,0 +1,482 @@ +{ + "tag":"Ok", + "value":{ + "code":"MOST_RECENT", + "language":"de", + "support":"web", + "type":"videoType", + "level":0, + "parent":null, + "alternativeLanguages":[ + { + "code":"fr", + "label":"Français", + "page":"MOST_RECENT", + "url":"/fr/videos/plus-recentes/", + "title":"Les plus récentes" + }, + { + "code":"de", + "label":"Deutsch", + "page":"MOST_RECENT", + "url":"/de/videos/neueste-videos/", + "title":"Neueste Videos" + }, + { + "code":"en", + "label":"English", + "page":"MOST_RECENT", + "url":"/en/videos/most-recent/", + "title":"Most recent" + }, + { + "code":"es", + "label":"Español", + "page":"MOST_RECENT", + "url":"/es/videos/los-mas-recientes/", + "title":"Los más recientes" + }, + { + "code":"pl", + "label":"Polski", + "page":"MOST_RECENT", + "url":"/pl/videos/najnowsze/", + "title":"Najnowsze" + }, + { + "code":"it", + "label":"Italiano", + "page":"MOST_RECENT", + "url":"/it/videos/piu-recenti/", + "title":"I più recenti" + } + ], + "url":"/de/videos/neueste-videos/", + "deeplink":"arte://programs/recent", + "slug":"neueste-videos", + "stats":{ + "xiti":{ + "page_name":"Most_recent", + "chapter1":"Looking_for", + "chapter2":null, + "chapter3":null, + "x1":"de", + "x2":"Search", + "x4":"A", + "s2":4, + "siteId":"582046", + "env_work":"prod", + "search_keywords":null + }, + "serverSideTracking":{ + "page":{ + "id":"MOST_RECENT", + "language":"de", + "url":"/de/videos/neueste-videos/", + "abv":"A", + "query":null, + "category":null, + "subcategories":null + }, + "content":null + } + }, + "metadata":{ + "title":"Neueste Videos", + "description":"Filme, Dokus, Sendungen, Serien … Entdecken Sie die Programme, die der ARTE Mediathek neu hinzugefügt wurden.", + "seo":{ + "title":"Neueste Videos | ARTE", + "description":null, + "canonical":"/de/videos/neueste-videos/" + }, + "og":{ + "image":{ + "url":"/img/SHARE_DEFAULT.jpg", + "width":1200, + "height":630 + } + }, + "twitter":{ + "image":{ + "url":"/img/SHARE_DEFAULT.jpg" + }, + "site":"@ARTEde" + } + }, + "zones":[ + { + "id":"daeadc71-4306-411a-8590-1c1f484ef5aa", + "code":"listing_MOST_RECENT", + "title":"Video Listing (MOST_RECENT)", + "slug":null, + "description":null, + "displayOptions":{ + "template":"vertical-landscape", + "theme":null, + "showZoneTitle":true, + "showItemTitle":true + }, + "link":null, + "authenticatedContent":null, + "groupedZonesName":null, + "content":{ + "data":[ + { + "id":"033559-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/033559-000-A", + "title":"Ein süßer Fratz", + "subtitle":null, + "shortDescription":"Eine junge Buchhändlerin wird von einem Fotografen entdeckt, der sie zum erfolgreichen Cover-Girl eines Modemagazins macht. Die unscheinbare, graue Maus verwandelt sich in eine attraktive Frau. - Audrey Hepburn und Fred Astaire spielen die Hauptrollen in diesem amüsanten Filmmusical (1957), das an Originalschauplätzen in Paris realisiert wurde.", + "mainImage":{ + "caption":null, + "url":"https://api-cdn.arte.tv/img/v2/image/jj9S3FSnvKh4KYTTpW9B3Y/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=B&em=033559-000-A&language=de&pageid=MOST_RECENT&position=1&support=web&teaserid=033559-000-A_de&teasertitle=Ein%20s%C3%BC%C3%9Fer%20Fratz&zoneCode=listing_MOST_RECENT&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_RECENT&zonename=Video%20Listing%20%28MOST_RECENT%29", + "url":"/de/videos/033559-000-A/ein-suesser-fratz/", + "programId":"033559-000-A", + "teaserText":"Audrey Hepburn wird an der Seite von Fred Astaire vom hässlichen Entlein zum schönen Schwan.", + "duration":5940, + "durationLabel":"99 Min.", + "geoblocking":{ + "code":"DE_FR", + "label":"Verfügbar in Deutschland und Frankreich", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Filme", + "url":"/de/videos/kino/", + "deeplink":"arte://emac/CIN" + }, + "audioVersions":[ + { + "code":"AD", + "label":"Audiodeskription" + }, + { + "code":"DE", + "label":"Synchronisation" + }, + { + "code":"STM", + "label":"Untertitel für Gehörlose" + } + ], + "availability":{ + "type":"VOD", + "start":"2024-02-11T04:00:00Z", + "end":"2024-02-18T04:00:00Z", + "upcomingDate":"2024-02-11T04:00:00Z", + "label":"Verfügbar vom 11/02/2024 bis 17/02/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"078154-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/078154-000-A", + "title":"Australiens unbekanntes Paradies", + "subtitle":"Die Inseln der Torres-Straße", + "shortDescription":"274 Inseln mit weißen Korallensandstränden erheben sich aus der Torres-Straße, der Meerenge zwischen Australien und Papua-Neuguinea. Die rund 20 bewohnten Inseln liegen oft viele Kilometer voneinander entfernt, der dortige Alltag ist von Isolation und Ressourcenknappheit geprägt. Die Dokumentation bietet überraschende Einblicke in die Lebenswelt der Torres-Straßen-Insulaner.", + "mainImage":{ + "caption":"Die idyllische Inselgruppe der Torres-Straße liegt in einer Meerenge zwischen Papua-Neuguinea und Australien.", + "url":"https://api-cdn.arte.tv/img/v2/image/hfer4Uf8bRCCbjLDtDbyB8/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=B&em=078154-000-A&language=de&pageid=MOST_RECENT&position=2&support=web&teaserid=078154-000-A_de&teasertitle=Australiens%20unbekanntes%20Paradies&zoneCode=listing_MOST_RECENT&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_RECENT&zonename=Video%20Listing%20%28MOST_RECENT%29", + "url":"/de/videos/078154-000-A/australiens-unbekanntes-paradies/", + "programId":"078154-000-A", + "teaserText":"Die Dokumentation bietet überraschende Einblicke in die einzigartige Lebenswelt der Torres-Straßen-Insulaner.", + "duration":2606, + "durationLabel":"44 Min.", + "geoblocking":{ + "code":"ALL", + "label":"Weltweit verfügbar", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Dokus und Reportagen", + "url":"/de/videos/dokumentationen-und-reportagen/", + "deeplink":"arte://emac/DOR" + }, + "audioVersions":[ + { + "code":"STM", + "label":"Untertitel für Gehörlose" + } + ], + "availability":{ + "type":"VOD", + "start":"2024-02-11T04:00:00Z", + "end":"2024-03-04T04:00:00Z", + "upcomingDate":"2024-02-11T04:00:00Z", + "label":"Verfügbar vom 11/02/2024 bis 03/03/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"101398-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/101398-000-A", + "title":"Albrecht Dürer - Ein Meister des Selbstporträts", + "subtitle":null, + "shortDescription":"Albrecht Dürer, einer der größten Maler der Renaissance, begründete an der Schwelle vom 15. zum 16. Jahrhundert einen neuen Künstlerstatus, der ihm Unabhängigkeit und Anerkennung bescherte. Seine zwölf, bis heute erhaltenen, Selbstporträts stellen ein wertvolles Erbe dar. Dieses Erbe zeugt von einem gesellschaftlichen Umbruch in Nordeuropa.", + "mainImage":{ + "caption":"Albrecht Dürer", + "url":"https://api-cdn.arte.tv/img/v2/image/EfwnqtdkLvHSfpPjSYuhr9/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=B&em=101398-000-A&language=de&pageid=MOST_RECENT&position=3&support=web&teaserid=101398-000-A_de&teasertitle=Albrecht%20D%C3%BCrer%20-%20Ein%20Meister%20des%20Selbstportr%C3%A4ts&zoneCode=listing_MOST_RECENT&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_RECENT&zonename=Video%20Listing%20%28MOST_RECENT%29", + "url":"/de/videos/101398-000-A/albrecht-duerer-ein-meister-des-selbstportraets/", + "programId":"101398-000-A", + "teaserText":"Albrecht Dürers zwölf, bis heute erhaltenen Selbstporträts zeugen von einem gesellschaftlichen Umbruch in Nordeuropa.", + "duration":3141, + "durationLabel":"53 Min.", + "geoblocking":{ + "code":"SAT", + "label":"Verfügbar in Europa", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Dokus und Reportagen", + "url":"/de/videos/dokumentationen-und-reportagen/", + "deeplink":"arte://emac/DOR" + }, + "audioVersions":[ + { + "code":"DE", + "label":"Synchronisation" + } + ], + "availability":{ + "type":"VOD", + "start":"2024-02-11T04:00:00Z", + "end":"2024-05-18T03:00:00Z", + "upcomingDate":"2024-02-11T04:00:00Z", + "label":"Verfügbar vom 11/02/2024 bis 17/05/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"109332-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/109332-000-A", + "title":"Dance! – von Latin bis Voguing", + "subtitle":"Mit Sylvia Camarda, Oumi Janta u.v.m.", + "shortDescription":"Ein Feuerwerk der Bewegung: Die Tänzerin und Choreografin Sylvia Camarda und weitere Tänzerinnen sowie Tänzer zeigen verschiedene Tanzstile, von Modern und Urban Dance über Latin Show Dance bis zu Voguing.", + "mainImage":{ + "caption":"Im Mittelpunkt der Sendung steht der Tänzer Ahmad Joudeh.", + "url":"https://api-cdn.arte.tv/img/v2/image/7U5ZRZ3duP8jEifekUvWSm/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + }, + { + "code":"ARTE_CONCERT", + "label":"ARTE Concert" + } + ], + "trackingPixel":"/ct/?abv=B&em=109332-000-A&language=de&pageid=MOST_RECENT&position=4&support=web&teaserid=109332-000-A_de&teasertitle=Dance%21%20%E2%80%93%20von%20Latin%20bis%20Voguing&zoneCode=listing_MOST_RECENT&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_RECENT&zonename=Video%20Listing%20%28MOST_RECENT%29", + "url":"/de/videos/109332-000-A/dance-von-latin-bis-voguing/", + "programId":"109332-000-A", + "teaserText":"Ein Feuerwerk der Bewegung: Die Tänzerin und Choreografin Sylvia Camarda zeigt Tanzstile aus allen Zeiten.", + "duration":2779, + "durationLabel":"47 Min.", + "geoblocking":{ + "code":"ALL", + "label":"Weltweit verfügbar", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Bühne und Konzert", + "url":"/de/arte-concert/", + "deeplink":"arte://emac/ARTE_CONCERT" + }, + "audioVersions":[ + + ], + "availability":{ + "type":"VOD", + "start":"2024-02-11T04:00:00Z", + "end":"2024-05-11T03:00:00Z", + "upcomingDate":"2024-02-11T04:00:00Z", + "label":"Verfügbar vom 11/02/2024 bis 10/05/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"111063-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/111063-000-A", + "title":"Händel: Giulio Cesare", + "subtitle":"Julie Fuchs in Amsterdam", + "shortDescription":"Die berühmte Händel-Oper \"Giulio Cesare\" in Amsterdam unter der musikalischen Leitung der französischen Dirigentin Emmanuelle Haïm und in einer Inszenierung von Calixto Bieito: Christophe Dumaux und Julie Fuchs sind Cäsar und Kleopatra. Um die grenzenlose Machtgier der superreichen Eliten bloßzustellen, überträgt Calixto Bieito die Handlung in eine namenlose Hightech-Oligarchie.", + "mainImage":{ + "caption":null, + "url":"https://api-cdn.arte.tv/img/v2/image/Harb4PQNnBVSRh6Liz2NJP/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + }, + { + "code":"ARTE_CONCERT", + "label":"ARTE Concert" + } + ], + "trackingPixel":"/ct/?abv=B&em=111063-000-A&language=de&pageid=MOST_RECENT&position=5&support=web&teaserid=111063-000-A_de&teasertitle=H%C3%A4ndel%3A%20Giulio%20Cesare&zoneCode=listing_MOST_RECENT&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_RECENT&zonename=Video%20Listing%20%28MOST_RECENT%29", + "url":"/de/videos/111063-000-A/haendel-giulio-cesare/", + "programId":"111063-000-A", + "teaserText":"Die berühmte Händel-Oper unter der musikalischen Leitung von Dirigentin Emmanuelle Haïm und inszeniert von Calixto Bieito", + "duration":11499, + "durationLabel":"192 Min.", + "geoblocking":{ + "code":"SAT", + "label":"Verfügbar in Europa", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Bühne und Konzert", + "url":"/de/arte-concert/", + "deeplink":"arte://emac/ARTE_CONCERT" + }, + "audioVersions":[ + { + "code":"OmU", + "label":"Untertitel" + } + ], + "availability":{ + "type":"VOD", + "start":"2024-02-11T04:00:00Z", + "end":"2024-03-19T04:00:00Z", + "upcomingDate":"2024-02-11T04:00:00Z", + "label":"Verfügbar vom 11/02/2024 bis 18/03/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + } + ], + "pagination":{ + "page":1, + "pages":100, + "totalCount":2000, + "links":{ + "first":"/api/emac/v4/de/web/zones/daeadc71-4306-411a-8590-1c1f484ef5aa/content?abv=B&authorizedCountry=DE&page=1&pageId=MOST_RECENT&zoneIndexInPage=0", + "next":"/api/emac/v4/de/web/zones/daeadc71-4306-411a-8590-1c1f484ef5aa/content?abv=B&authorizedCountry=DE&page=2&pageId=MOST_RECENT&zoneIndexInPage=0", + "last":"/api/emac/v4/de/web/zones/daeadc71-4306-411a-8590-1c1f484ef5aa/content?abv=B&authorizedCountry=DE&page=100&pageId=MOST_RECENT&zoneIndexInPage=0" + } + } + } + } + ] + } +} diff --git a/src/test/developTest/resources/arte/arte_video_list2.json b/src/test/developTest/resources/arte/arte_video_list2.json new file mode 100644 index 00000000..17396b30 --- /dev/null +++ b/src/test/developTest/resources/arte/arte_video_list2.json @@ -0,0 +1,353 @@ +{ + "tag":"Ok", + "value":{ + "data":[ + { + "id":"099708-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/099708-000-A", + "title":"USA gegen Hitler", + "subtitle":"Wie ein Spion den Nazis den Krieg erklärte", + "shortDescription":"Die packende Geschichte des britischen Geheimagenten William Stephenson, der die USA an der Seite Großbritanniens in den Zweiten Weltkrieg trieb: Archivmaterial und Interviews, u.a. mit Timothy Naftali, einer der wenigen lebenden Historiker, der Einblick in die streng geheimen Akten hatte, ordnen die Geschehnisse ein und ziehen Parallelen zu heutigen Ereignissen.", + "mainImage":{ + "caption":"William Stephenson (James Loye, li.) zeigt William Donovan (Eric Davis, re.) eine gefälschte Karte mit Hitlers angeblichen Eroberungsplänen Amerikas ...", + "url":"https://api-cdn.arte.tv/img/v2/image/Dr7Nj2X9Uk6UzKYoFK4GCm/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=A&em=099708-000-A&language=de&pageid=MOST_VIEWED&position=1&support=web&teaserid=099708-000-A_de&teasertitle=USA%20gegen%20Hitler&zoneCode=listing_MOST_VIEWED&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_VIEWED&zonename=Video%20Listing%20%28MOST_VIEWED%29", + "url":"/de/videos/099708-000-A/usa-gegen-hitler/", + "programId":"099708-000-A", + "teaserText":"1939 wurde ein britisch-kanadischer Geschäftsmann zu einem der größten Spione des 20. Jahrhunderts.", + "duration":5488, + "durationLabel":"92 Min.", + "geoblocking":{ + "code":"DE_FR", + "label":"Verfügbar in Deutschland und Frankreich", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Dokus und Reportagen", + "url":"/de/videos/dokumentationen-und-reportagen/", + "deeplink":"arte://emac/DOR" + }, + "audioVersions":[ + { + "code":"DE", + "label":"Synchronisation" + }, + { + "code":"STM", + "label":"Untertitel für Gehörlose" + } + ], + "availability":{ + "type":"VOD", + "start":"2024-01-29T04:00:00Z", + "end":"2024-04-29T03:00:00Z", + "upcomingDate":"2024-01-29T04:00:00Z", + "label":"Verfügbar vom 29/01/2024 bis 28/04/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"098846-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/098846-000-A", + "title":"Die Normannen - Herrscher von Sizilien", + "subtitle":null, + "shortDescription":"Sie kamen aus dem hohen Norden, waren als wilde Krieger verschrien und schufen auf Sizilien das reichste und fortschrittlichste Königreich Europas: die Normannen. Der Dokumentarfilm macht diese facettenreiche Geschichte aus dem Mittelalter lebendig und präsentiert sie so bunt und vielfältig, wie Sizilien heute ist.", + "mainImage":{ + "caption":"Königin Konstanze von Sizilien (Alessia Guerrieri, Mi.) in Gegnerschaft zu Walter von Pagliara (Alessandro Cremona, 2.v.l.), einem Gefolgsmann ihres soeben verstorbenen Gatten Heinrich VI.", + "url":"https://api-cdn.arte.tv/img/v2/image/Y3p4LbePETFeFSqtWppdUN/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=A&em=098846-000-A&language=de&pageid=MOST_VIEWED&position=2&support=web&teaserid=098846-000-A_de&teasertitle=Die%20Normannen%20-%20Herrscher%20von%20Sizilien&zoneCode=listing_MOST_VIEWED&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_VIEWED&zonename=Video%20Listing%20%28MOST_VIEWED%29", + "url":"/de/videos/098846-000-A/die-normannen-herrscher-von-sizilien/", + "programId":"098846-000-A", + "teaserText":"Sie waren als wilde Krieger verschrien und schufen das fortschrittlichste Königreich jener Zeit: die Normannen.", + "duration":5219, + "durationLabel":"87 Min.", + "geoblocking":{ + "code":"DE_FR", + "label":"Verfügbar in Deutschland und Frankreich", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Dokus und Reportagen", + "url":"/de/videos/dokumentationen-und-reportagen/", + "deeplink":"arte://emac/DOR" + }, + "audioVersions":[ + { + "code":"STM", + "label":"Untertitel für Gehörlose" + } + ], + "availability":{ + "type":"VOD", + "start":"2024-01-26T04:00:00Z", + "end":"2024-02-26T04:00:00Z", + "upcomingDate":"2024-01-26T04:00:00Z", + "label":"Verfügbar vom 26/01/2024 bis 25/02/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"111648-001-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/111648-001-A", + "title":"Südafrikas Garden Route", + "subtitle":"Eine legendäre Küstenstraße", + "shortDescription":"Am südlichsten Zipfel des afrikanischen Kontinents treffen der raue Atlantische und der milde Indische Ozean aufeinander. Entlang dieser Küstenregion verläuft eine der spektakulärsten Panoramastraßen der Welt: Südafrikas Garden Route. In der ersten Episode der zweiteiligen Doku fahren wir von Kapstadt bis in die Lagunenstadt Knysna.", + "mainImage":{ + "caption":"Kap Agulhas ist das Ende des afrikanischen Kontinents: Danach kommt nichts als Wasser und ein paar Tausend Kilometer weiter die Antarktis.", + "url":"https://api-cdn.arte.tv/img/v2/image/sbV6S8JqYSJSvpEQPjYa5W/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=A&em=111648-001-A&language=de&pageid=MOST_VIEWED&position=3&support=web&teaserid=111648-001-A_de&teasertitle=S%C3%BCdafrikas%20Garden%20Route&zoneCode=listing_MOST_VIEWED&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_VIEWED&zonename=Video%20Listing%20%28MOST_VIEWED%29", + "url":"/de/videos/111648-001-A/suedafrikas-garden-route/", + "programId":"111648-001-A", + "teaserText":"Porträt einer der spektakulärsten Panoramastraßen der Welt: Südafrikas Garden Route.", + "duration":2610, + "durationLabel":"44 Min.", + "geoblocking":{ + "code":"DE_FR", + "label":"Verfügbar in Deutschland und Frankreich", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Dokus und Reportagen", + "url":"/de/videos/dokumentationen-und-reportagen/", + "deeplink":"arte://emac/DOR" + }, + "audioVersions":[ + + ], + "availability":{ + "type":"VOD", + "start":"2024-02-08T04:00:00Z", + "end":"2025-02-07T04:00:00Z", + "upcomingDate":"2024-02-08T04:00:00Z", + "label":"Verfügbar vom 08/02/2024 bis 06/02/2025" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"112235-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/112235-000-A", + "title":"Enthüllung einer Staatsaffäre", + "subtitle":null, + "shortDescription":"Paris 2015: Der Zoll beschlagnahmt mehr als sieben Tonnen Cannabis. Kurz darauf kontaktiert der ehemalige Agent der Drogenfahndung, Hubert Antoine, den Journalisten Stéphane Vilner. Antoine gibt an über Beweise zu haben, die den Chef der zentralen Drogenfahndung schwer belasten ... - Politthriller (2021), inspiriert von dem Buch \"L'Infiltré\" von Hubert Avoine und Emmanuel Fansten", + "mainImage":{ + "caption":"Der junge Journalist Stéphane Vilner (Pio Marmaï, re.) blickt auf Entwürfe seiner Titelstory für die „Libération“.", + "url":"https://api-cdn.arte.tv/img/v2/image/qbtv9ATfr4wyzGXm3F9zSV/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=A&em=112235-000-A&language=de&pageid=MOST_VIEWED&position=4&support=web&teaserid=112235-000-A_de&teasertitle=Enth%C3%BCllung%20einer%20Staatsaff%C3%A4re&zoneCode=listing_MOST_VIEWED&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_VIEWED&zonename=Video%20Listing%20%28MOST_VIEWED%29", + "url":"/de/videos/112235-000-A/enthuellung-einer-staatsaffaere/", + "programId":"112235-000-A", + "teaserText":"Paris 2015: Der Zoll beschlagnahmt mehr als sieben Tonnen Cannabis. Steckt die Drogenfahndung selbst dahinter?", + "duration":6924, + "durationLabel":"116 Min.", + "geoblocking":{ + "code":"DE_FR", + "label":"Verfügbar in Deutschland und Frankreich", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Filme", + "url":"/de/videos/kino/", + "deeplink":"arte://emac/CIN" + }, + "audioVersions":[ + { + "code":"AD", + "label":"Audiodeskription" + }, + { + "code":"DE", + "label":"Synchronisation" + }, + { + "code":"STM", + "label":"Untertitel für Gehörlose" + } + ], + "availability":{ + "type":"VOD", + "start":"2024-01-17T04:00:00Z", + "end":"2024-02-16T04:00:00Z", + "upcomingDate":"2024-01-17T04:00:00Z", + "label":"Verfügbar vom 17/01/2024 bis 15/02/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"113043-139-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/113043-139-A", + "title":"Taylor Swift: Ein politisches Schwergewicht?", + "subtitle":"ARTE Info Expresso", + "shortDescription":"Sie hat 16 Mal so viele Follower wie der US-Präsident, mobilisiert junge Wähler, kurbelt mit ihren Konzerten die Wirtschaft an und wurde vom Time Magazine zur einflussreichsten Person des Jahres gekürt: Wie viel Einfluss könnte Mega-Star Taylor Swift auf die Politik der USA und die bevorstehenden Präsidentschaftwahlen nehmen? Und warum reißen sich führende Politiker weltweit um ihren Besuch?", + "mainImage":{ + "caption":null, + "url":"https://api-cdn.arte.tv/img/v2/image/y43c6rZXcFXVaxNCfE89Zg/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=A&em=113043-139-A&language=de&pageid=MOST_VIEWED&position=5&support=web&teaserid=113043-139-A_de&teasertitle=Taylor%20Swift%3A%20Ein%20politisches%20Schwergewicht%3F&zoneCode=listing_MOST_VIEWED&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_VIEWED&zonename=Video%20Listing%20%28MOST_VIEWED%29", + "url":"/de/videos/113043-139-A/taylor-swift-ein-politisches-schwergewicht/", + "programId":"113043-139-A", + "teaserText":"Die Sängerin hat 16 Mal so viele Follower wie US-Präsident Joe Biden und mobilisiert junge Wähler.", + "duration":132, + "durationLabel":"3 Min.", + "geoblocking":{ + "code":"ALL", + "label":"Weltweit verfügbar", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Dokus und Reportagen", + "url":"/de/videos/dokumentationen-und-reportagen/", + "deeplink":"arte://emac/DOR" + }, + "audioVersions":[ + + ], + "availability":{ + "type":"VOD", + "start":"2023-12-13T11:41:00Z", + "end":"2024-12-14T23:00:00Z", + "upcomingDate":"2023-12-13T11:41:00Z", + "label":"Verfügbar vom 13/12/2023 bis 14/12/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + } + ], + "pagination":{ + "page":2, + "pages":100, + "totalCount":2000, + "links":{ + "first":"/api/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=1&pageId=MOST_VIEWED&zoneIndexInPage=0", + "next":"/api/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=3&pageId=MOST_VIEWED&zoneIndexInPage=0", + "last":"/api/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=100&pageId=MOST_VIEWED&zoneIndexInPage=0" + } + } + } +} \ No newline at end of file diff --git a/src/test/developTest/resources/arte/arte_video_list_last.json b/src/test/developTest/resources/arte/arte_video_list_last.json new file mode 100644 index 00000000..17a32518 --- /dev/null +++ b/src/test/developTest/resources/arte/arte_video_list_last.json @@ -0,0 +1,213 @@ +{ + "tag":"Ok", + "value":{ + "data":[ + { + "id":"102805-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/102805-000-A", + "title":"Modeselektor presents: Work", + "subtitle":"Ein Film von Corey Scott-Gilbert, Krsn Brasko und Tobias Staab", + "shortDescription":"Am 9. April 2021 erscheint das fünfte Album Extended des Berliner Duos Modeselektor auf ihrem Label Monkeytown Records. In Form eines Visual Albums präsentiert sich die Musik des 27 Stücke umfassenden Mixtapes in Verbindung mit dem eigens dafür produzierten Film Work, der die Energie der Musik in eine radikale Performance des Ausnahmetänzers Corey Scott-Gilbert übersetzt. Work entstand unter Mithilfe von Corey Scott-Gilbert (artistic director, performance), Krsn Brasko (film & creative director), Tobias Staab (creative director) und Modeselektor (music).\n\nAufzeichnung von April 2021", + "mainImage":{ + "caption":"Modeselektor", + "url":"https://api-cdn.arte.tv/img/v2/image/oRN9BuXWBguqK9zn7vfSph/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + }, + { + "code":"ARTE_CONCERT", + "label":"ARTE Concert" + } + ], + "trackingPixel":"/ct/?abv=A&em=102805-000-A&language=de&pageid=MOST_VIEWED&position=1&support=web&teaserid=102805-000-A_de&teasertitle=Modeselektor%20presents%3A%20Work&zoneCode=listing_MOST_VIEWED&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_VIEWED&zonename=Video%20Listing%20%28MOST_VIEWED%29", + "url":"/de/videos/102805-000-A/modeselektor-presents-work/", + "programId":"102805-000-A", + "teaserText":"Ein filmischer Trip mit Corey Scott-Gilbert zur Musik von Modeselektor. ", + "duration":3600, + "durationLabel":"60 Min.", + "geoblocking":{ + "code":"ALL", + "label":"Weltweit verfügbar", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Bühne und Konzert", + "url":"/de/arte-concert/", + "deeplink":"arte://emac/ARTE_CONCERT" + }, + "audioVersions":[ + + ], + "availability":{ + "type":"VOD", + "start":"2023-03-05T09:00:00Z", + "end":"2033-03-04T09:00:00Z", + "upcomingDate":"2023-03-05T09:00:00Z", + "label":"Verfügbar vom 05/03/2023 bis 04/03/2033" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"104017-000-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/104017-000-A", + "title":"Sisters with Transistors: Die verkannten Heldinnen der elektronischen Musik", + "subtitle":null, + "shortDescription":"Die Dokumentation erzählt die verblüffende Geschichte der Pionierinnen der elektronischen Musik. In einer virtuosen Mischung aus Archivmaterial, Interviews und visionärer Musik entsteht ein unterhaltsames Zeit- und Sittenbild von den Kriegsjahren bis heute, das von der befreienden Kraft neuer Technologien erzählt. Erzählerin ist die Musik-Ikone Laurie Anderson.", + "mainImage":{ + "caption":"Sisters with Transistors", + "url":"https://api-cdn.arte.tv/img/v2/image/NeLcWu6wFMEyZmVAsVhfPP/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=A&em=104017-000-A&language=de&pageid=MOST_VIEWED&position=2&support=web&teaserid=104017-000-A_de&teasertitle=Sisters%20with%20Transistors%3A%20Die%20verkannten%20Heldinnen%20der%20elektronischen%20Musik&zoneCode=listing_MOST_VIEWED&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_VIEWED&zonename=Video%20Listing%20%28MOST_VIEWED%29", + "url":"/de/videos/104017-000-A/sisters-with-transistors-die-verkannten-heldinnen-der-elektronischen-musik/", + "programId":"104017-000-A", + "teaserText":"Die verblüffende Geschichte der Pionierinnen der elektronischen Musik", + "duration":3180, + "durationLabel":"53 Min.", + "geoblocking":{ + "code":"EUR_DE_FR", + "label":"Verfügbar in den deutsch- und französischsprachigen Ländern Europas", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Dokus und Reportagen", + "url":"/de/videos/dokumentationen-und-reportagen/", + "deeplink":"arte://emac/DOR" + }, + "audioVersions":[ + { + "code":"OmU", + "label":"Untertitel" + } + ], + "availability":{ + "type":"VOD", + "start":"2022-01-05T09:56:19Z", + "end":"2025-03-31T03:00:00Z", + "upcomingDate":"2022-01-05T09:56:19Z", + "label":"Verfügbar vom 05/01/2022 bis 30/03/2025" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + }, + { + "id":"106273-006-A_de", + "type":"teaser", + "kind":{ + "code":"SHOW", + "label":"Programm", + "isCollection":false + }, + "deeplink":"arte://program/106273-006-A", + "title":"Reminded – Maria Montessori", + "subtitle":"Lernen ohne Noten", + "shortDescription":"Facebook, Amazon, Google, Microsoft: Einige Gründer großer Tech-Firmen waren auf Montessori-Schulen. Das pädagogische Konzept von Maria Montessori ist über 100 Jahre alt und wird heute weltweit angewandt. Aber es ist nicht unumstritten. Malcolm besucht eine Montessori-Schule und fragt eine Erziehungswissenschaftlerin: Ist diese Schulform für jedes Kind geeignet?", + "mainImage":{ + "caption":"Maria Montessori", + "url":"https://api-cdn.arte.tv/img/v2/image/hKBgcjWs9ZuJYFc7uHxGiV/__SIZE__?type=TEXT" + }, + "stickers":[ + { + "code":"PLAYABLE", + "label":"PLAYABLE" + }, + { + "code":"FULL_VIDEO", + "label":"Das Programm sehen" + } + ], + "trackingPixel":"/ct/?abv=A&em=106273-006-A&language=de&pageid=MOST_VIEWED&position=3&support=web&teaserid=106273-006-A_de&teasertitle=Reminded%20%E2%80%93%20Maria%20Montessori&zoneCode=listing_MOST_VIEWED&zoneIndexInPage=0&zoneTemplate=vertical_landscape&zoneid=listing_MOST_VIEWED&zonename=Video%20Listing%20%28MOST_VIEWED%29", + "url":"/de/videos/106273-006-A/reminded-maria-montessori/", + "programId":"106273-006-A", + "teaserText":"Einige Gründer großer Firmen waren auf Montessori-Schulen. Wie aktuell ist dieses Konzept nach 100 Jahren?", + "duration":1045, + "durationLabel":"18 Min.", + "geoblocking":{ + "code":"ALL", + "label":"Weltweit verfügbar", + "inclusion":[ + + ], + "exclusion":[ + + ] + }, + "genre":{ + "label":"Dokus und Reportagen", + "url":"/de/videos/dokumentationen-und-reportagen/", + "deeplink":"arte://emac/DOR" + }, + "audioVersions":[ + + ], + "availability":{ + "type":"VOD", + "start":"2023-04-12T03:00:00Z", + "end":"2024-04-12T03:00:00Z", + "upcomingDate":"2023-04-12T03:00:00Z", + "label":"Verfügbar vom 12/04/2023 bis 11/04/2024" + }, + "ageRating":0, + "callToAction":"Abspielen", + "clip":null, + "trailer":null, + "childrenCount":null + } + ], + "pagination":{ + "page":100, + "pages":100, + "totalCount":2000, + "links":{ + "first":"/api/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=1&pageId=MOST_VIEWED&zoneIndexInPage=0", + "next":null, + "last":"/api/emac/v4/de/web/zones/82b597d7-a83b-4dd8-bea8-ad71675fdf23/content?abv=A&authorizedCountry=DE&page=100&pageId=MOST_VIEWED&zoneIndexInPage=0" + } + } + } +} \ No newline at end of file From 5c44e998c3493077616a1c875456ec8f1e90e953 Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Sun, 11 Feb 2024 16:11:04 +0100 Subject: [PATCH 4/6] extend deep search --- .../mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java index b735be93..73cefe03 100644 --- a/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/mServer/crawler/sender/ard/tasks/ArdTopicPageTask.java @@ -7,6 +7,7 @@ import mServer.crawler.sender.ard.ArdFilmInfoDto; import mServer.crawler.sender.ard.ArdTopicInfoDto; import mServer.crawler.sender.ard.json.ArdTopicPageDeserializer; + import mServer.crawler.sender.base.AbstractRecursivConverterTask; import mServer.crawler.sender.base.CrawlerUrlDTO; import org.apache.logging.log4j.LogManager; @@ -79,6 +80,10 @@ public class ArdTopicPageTask extends ArdTaskBase TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy92ZXJydWVja3QtbmFjaC1tZWVy"); // Dahoam is dahoam TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL2JyLmRlL2Jyb2FkY2FzdFNlcmllcy9icm9hZGNhc3RTZXJpZXM6L2JyZGUvZmVybnNlaGVuL2JheWVyaXNjaGVzLWZlcm5zZWhlbi9zZW5kdW5nZW4vZGFob2FtLWlzLWRhaG9hbQ"); + // Rote Rosen + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9vbmUvcm90ZXJvc2Vu"); + // Heimatflimmern + TOPICS_LOAD_ALL_PAGES.add("Y3JpZDovL3dkci5kZS9oZWltYXRmbGltbWVybg"); } public ArdTopicPageTask(MediathekReader aCrawler, From cb6ebadc573a55ef71b9626c31f928ee5f6b820b Mon Sep 17 00:00:00 2001 From: pidoubleyou Date: Sun, 11 Feb 2024 16:13:32 +0100 Subject: [PATCH 5/6] fix sonar --- src/main/java/mServer/crawler/sender/arte/MediathekArte.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/mServer/crawler/sender/arte/MediathekArte.java b/src/main/java/mServer/crawler/sender/arte/MediathekArte.java index 73a81d0e..6a560d18 100644 --- a/src/main/java/mServer/crawler/sender/arte/MediathekArte.java +++ b/src/main/java/mServer/crawler/sender/arte/MediathekArte.java @@ -272,7 +272,7 @@ private void loadNextPage(String sender, String langCode, String aCategory, Stri } private boolean shouldLoadNextPage(String sender) { - if (sender == Const.ARTE_DE) { + if (Const.ARTE_DE.equals(sender)) { return true; } From e7365c37007ce3a3451b8ed84ef9c16ee5d43516 Mon Sep 17 00:00:00 2001 From: Alexander F Date: Wed, 14 Feb 2024 11:07:17 +0100 Subject: [PATCH 6/6] Version auf 3.1.228 angehoben. --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 96986f6d..438aae77 100644 --- a/build.gradle +++ b/build.gradle @@ -27,7 +27,7 @@ sourceCompatibility = JavaVersion.VERSION_17 targetCompatibility = JavaVersion.VERSION_17 group = 'de.mediathekview' archivesBaseName = "MServer" -version = '3.1.227' +version = '3.1.228' def jarName = 'MServer.jar' def mainClass = 'mServer.Main'