diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdTopicInfoDto.java b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdTopicInfoDto.java index c60ae7a2f..a36b4da79 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/ArdTopicInfoDto.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/ArdTopicInfoDto.java @@ -5,34 +5,22 @@ public class ArdTopicInfoDto { private final Set filmInfos; - private int subPageNumber; - private int maxSubPageNumber; + private int pageNumber; + private int pageSize; + private int totalElements; + public ArdTopicInfoDto(final Set filmInfos) { this.filmInfos = filmInfos; - subPageNumber = 0; - maxSubPageNumber = 0; + setPageNumber(0); + setPageSize(0); + setTotalElements(0); } public Set getFilmInfos() { return filmInfos; } - public int getSubPageNumber() { - return subPageNumber; - } - - public void setSubPageNumber(final int subPageNumber) { - this.subPageNumber = subPageNumber; - } - - public int getMaxSubPageNumber() { - return maxSubPageNumber; - } - - public void setMaxSubPageNumber(final int maxSubPageNumber) { - this.maxSubPageNumber = maxSubPageNumber; - } @Override public boolean equals(final Object o) { @@ -42,13 +30,38 @@ public boolean equals(final Object o) { if (!(o instanceof final ArdTopicInfoDto that)) { return false; } - return getSubPageNumber() == that.getSubPageNumber() - && getMaxSubPageNumber() == that.getMaxSubPageNumber() + return getPageNumber() == that.getPageNumber() + && getPageSize() == that.getPageSize() + && getTotalElements() == that.getTotalElements() && Objects.equals(filmInfos, that.filmInfos); } @Override public int hashCode() { - return Objects.hash(filmInfos, getSubPageNumber(), getMaxSubPageNumber()); + return Objects.hash(filmInfos, getPageNumber(), getPageSize(), getTotalElements()); + } + + public int getPageNumber() { + return pageNumber; + } + + public void setPageNumber(int pageNumber) { + this.pageNumber = pageNumber; + } + + public int getPageSize() { + return pageSize; + } + + public void setPageSize(int pageSize) { + this.pageSize = pageSize; + } + + public int getTotalElements() { + return totalElements; + } + + public void setTotalElements(int totalElements) { + this.totalElements = totalElements; } } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicPageDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicPageDeserializer.java index 7bcc9ba54..b4f166811 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicPageDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicPageDeserializer.java @@ -30,13 +30,12 @@ public ArdTopicInfoDto deserialize( } final JsonElement paginationElement = showPageObject.get(ELEMENT_PAGINATION); - ardTopicInfoDto.setSubPageNumber( - getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_NUMBER)); + final int pageNumber = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_NUMBER); final int totalElements = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_TOTAL_ELEMENTS); final int pageSize = getChildElementAsIntOrNullIfNotExist(paginationElement, ELEMENT_PAGE_SIZE); - ardTopicInfoDto.setMaxSubPageNumber(pageSize == 0 ? 0 : - (totalElements+pageSize-1)/pageSize); - + ardTopicInfoDto.setPageNumber(pageNumber); + ardTopicInfoDto.setPageSize(pageSize); + ardTopicInfoDto.setTotalElements(totalElements); return ardTopicInfoDto; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java index 805ef3152..53472d506 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/ard/tasks/ArdTopicPageTask.java @@ -14,8 +14,7 @@ import java.lang.reflect.Type; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.stream.Collectors; -import java.util.stream.IntStream; + public class ArdTopicPageTask extends ArdTaskBase { private static final Logger LOG = LogManager.getLogger(ArdTopicPageTask.class); @@ -52,35 +51,21 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg private Queue createSubPageUrls( final WebTarget aTarget, final ArdTopicInfoDto topicInfo) { final Queue subpages = new ConcurrentLinkedQueue<>(); - - final int actualSubPageNumber = topicInfo.getSubPageNumber(); - final Integer maximumAllowedSubpages = crawler.getCrawlerConfig().getMaximumSubpages(); - if (actualSubPageNumber != 0) { - LOG.debug("Sub page {} is already the maximum allowed sub page.", actualSubPageNumber); + if (topicInfo.getTotalElements() < topicInfo.getPageSize() || topicInfo.getPageNumber() > 0) { return subpages; } - - final int maxSubPageNumber = topicInfo.getMaxSubPageNumber(); - subpages.addAll( - IntStream.range( - actualSubPageNumber + 1, - (maximumAllowedSubpages >= maxSubPageNumber - ? maxSubPageNumber - : maximumAllowedSubpages) - + 1) - .parallel() - .mapToObj(subpageNumber -> changePageNumber(aTarget, subpageNumber)) - .map(CrawlerUrlDTO::new) - .collect(Collectors.toSet())); - - if (LOG.isDebugEnabled() && maxSubPageNumber > maximumAllowedSubpages) { - LOG.debug( - "Found {} sub pages, these are {} more then the allowed {} to crawl. Added {} and skipped the rest.", - maxSubPageNumber, - maxSubPageNumber - maximumAllowedSubpages, - maximumAllowedSubpages, - subpages.size()); + // + final Integer maximumAllowedSubpages = crawler.getCrawlerConfig().getMaximumSubpages(); + int index = 0; + while ((topicInfo.getPageSize() + (index * topicInfo.getPageSize())) < topicInfo.getTotalElements()) { + subpages.add(new CrawlerUrlDTO(changePageNumber(aTarget, index + 1))); + index++; + if (index >= maximumAllowedSubpages) { + LOG.debug("ignore more subpage due to limit of {} pages but found {}", maximumAllowedSubpages, Integer.valueOf(topicInfo.getTotalElements() / topicInfo.getPageSize())); + break; + } } + LOG.debug("Found {} subpage", subpages.size()); return subpages; } diff --git a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicPageDeserializerPaginationTest.java b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicPageDeserializerPaginationTest.java index b8ec457b4..8c3f23051 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicPageDeserializerPaginationTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/ard/json/ArdTopicPageDeserializerPaginationTest.java @@ -17,7 +17,8 @@ public void testDeserializePagination() { final ArdTopicInfoDto ardTopicInfoDto = instance.deserialize(jsonElement, null, null); - assertThat(ardTopicInfoDto.getSubPageNumber(), is(0)); - assertThat(ardTopicInfoDto.getMaxSubPageNumber(), is(5)); + assertThat(ardTopicInfoDto.getPageNumber(), is(0)); + assertThat(ardTopicInfoDto.getPageSize(), is(50)); + assertThat(ardTopicInfoDto.getTotalElements(), is(204)); } }