diff --git a/src/main/java/mServer/crawler/sender/ard/ArdConstants.java b/src/main/java/mServer/crawler/sender/ard/ArdConstants.java index b7691f2ff..9960b4c44 100644 --- a/src/main/java/mServer/crawler/sender/ard/ArdConstants.java +++ b/src/main/java/mServer/crawler/sender/ard/ArdConstants.java @@ -10,9 +10,8 @@ public class ArdConstants { public static final String TOPICS_URL = API_URL + "/page-gateway/pages/%s/editorial/experiment-a-z?embedded=false"; public static final String TOPICS_COMPILATION_URL = API_URL + "/page-gateway/widgets/%s/editorials/%s?pageNumber=0&pageSize=%s"; public static final String TOPIC_URL = API_URL + "/page-gateway/widgets/ard/asset/%s?pageSize=%d"; - public static final String DAY_PAGE_URL = API_URL + "/page-gateway/compilations/%s/pastbroadcasts?startDateTime=%sT00:00:00.000Z&endDateTime=%sT23:59:59.000Z&pageNumber=0&pageSize=%d"; + public static final String DAY_PAGE_URL = "https://programm-api.ard.de/program/api/program?day=%s&channelIds=%s&mode=channel"; - public static final int DAY_PAGE_SIZE = 100; public static final int TOPICS_COMPILATION_PAGE_SIZE = 200; public static final int TOPIC_PAGE_SIZE = 50; diff --git a/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java b/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java index 4eec58513..a2c2d9683 100644 --- a/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java +++ b/src/main/java/mServer/crawler/sender/ard/ArdCrawler.java @@ -20,8 +20,9 @@ public class ArdCrawler extends MediathekCrawler { public static final String SENDERNAME = Const.ARD; - private static final int MAX_DAYS_PAST = 2; - private static final int MAX_DAYS_PAST_AVAILABLE = 6; + private static final int MAX_DAYS_PAST = 7; + private static final int MAX_DAYS_FUTURE = 7; + private static final int MAX_DAYS_PAST_AVAILABLE = 7; private static final DateTimeFormatter DAY_PAGE_DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd"); @@ -71,6 +72,12 @@ private ConcurrentLinkedQueue createDayUrlsToCrawl() { addDayUrls(dayUrlsToCrawl, now.minusDays(i)); } + if (CrawlerTool.loadLongMax()) { + for (int i = 0; i < MAX_DAYS_FUTURE; i++) { + addDayUrls(dayUrlsToCrawl, now.plusDays(i)); + } + } + addSpecialDays(dayUrlsToCrawl); return dayUrlsToCrawl; @@ -79,7 +86,7 @@ private ConcurrentLinkedQueue createDayUrlsToCrawl() { private void addDayUrls(ConcurrentLinkedQueue dayUrlsToCrawl, LocalDateTime day) { final String formattedDay = day.format(DAY_PAGE_DATE_FORMATTER); for (String client : ArdConstants.CLIENTS) { - final String url = String.format(ArdConstants.DAY_PAGE_URL, client, formattedDay, formattedDay, ArdConstants.DAY_PAGE_SIZE); + final String url = String.format(ArdConstants.DAY_PAGE_URL, formattedDay, client); dayUrlsToCrawl.offer(new CrawlerUrlDTO(url)); } } diff --git a/src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java b/src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java index 5ef875609..c489ec270 100644 --- a/src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java +++ b/src/main/java/mServer/crawler/sender/ard/json/ArdDayPageDeserializer.java @@ -2,33 +2,63 @@ import com.google.gson.*; +import mServer.crawler.sender.ard.ArdConstants; import mServer.crawler.sender.ard.ArdFilmInfoDto; +import mServer.crawler.sender.base.JsonUtils; import java.lang.reflect.Type; import java.util.HashSet; +import java.util.Optional; import java.util.Set; -public class ArdDayPageDeserializer extends ArdTeasersDeserializer - implements JsonDeserializer> { +public class ArdDayPageDeserializer implements JsonDeserializer> { - private static final String ELEMENT_TEASERS = "teasers"; + private static final String ELEMENT_CHANNELS = "channels"; + private static final String ELEMENT_LINKS = "links"; + private static final String ELEMENT_TARGET = "target"; + private static final String ELEMENT_TIMESLOTS = "timeSlots"; + private static final String ATTRIBUTE_URL_ID = "urlId"; @Override public Set deserialize( final JsonElement jsonElement, final Type type, final JsonDeserializationContext context) { final Set results = new HashSet<>(); - if (!jsonElement.isJsonArray()) { - return results; + final JsonObject jsonObject = jsonElement.getAsJsonObject(); + if (jsonObject.has(ELEMENT_CHANNELS)) { + final JsonArray channels = jsonObject.get(ELEMENT_CHANNELS).getAsJsonArray(); + results.addAll(parseChannels(channels)); } - final JsonObject firstElement = jsonElement.getAsJsonArray().get(0).getAsJsonObject(); + return results; + } - if (firstElement.has(ELEMENT_TEASERS)) { - final JsonArray teasers = firstElement.get(ELEMENT_TEASERS).getAsJsonArray(); - results.addAll(parseTeasers(teasers)); + private Set parseChannels(JsonArray channels) { + Set entries = new HashSet<>(); + for (JsonElement channel : channels) { + final JsonArray timeSlots = channel.getAsJsonObject().get(ELEMENT_TIMESLOTS).getAsJsonArray(); + for (JsonElement timeSlot : timeSlots) { + for (JsonElement entry : timeSlot.getAsJsonArray()) { + final JsonObject entryObject = entry.getAsJsonObject(); + final Optional id = toId(entryObject); + id.ifPresent(s -> entries.add(createFilmInfo(s, 1))); + } + } } + return entries; + } - return results; + private ArdFilmInfoDto createFilmInfo(final String id, final int numberOfClips) { + final String url = String.format(ArdConstants.ITEM_URL, id); + return new ArdFilmInfoDto(id, url, numberOfClips); + } + + private Optional toId(final JsonObject teaserObject) { + if (JsonUtils.checkTreePath(teaserObject, ELEMENT_LINKS, ELEMENT_TARGET)) { + final JsonObject targetObject = + teaserObject.get(ELEMENT_LINKS).getAsJsonObject().get(ELEMENT_TARGET).getAsJsonObject(); + return JsonUtils.getAttributeAsString(targetObject, ATTRIBUTE_URL_ID); + } + return JsonUtils.getAttributeAsString(teaserObject, ATTRIBUTE_URL_ID); } }