From 9f0487d03aef0b9fb7bcdba4fa9c0fef5ba41bb1 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Fri, 31 May 2024 20:50:33 +0200 Subject: [PATCH 1/4] ZDF Film to Partner logic --- MServer-Config.yaml | 11 +- .../base/utils/FilmlistDebugHelper.java | 111 ++++++++++++ .../crawler/dreisat/DreiSatCrawler.java | 2 +- .../crawler/dreisat/DreisatConstants.java | 11 ++ .../crawler/phoenix/PhoenixConstants.java | 12 ++ .../phoenix/tasks/PhoenixFilmDetailTask.java | 2 +- .../crawler/zdf/AbstractZdfCrawler.java | 10 +- .../mserver/crawler/zdf/ZdfConstants.java | 15 ++ .../mserver/crawler/zdf/ZdfCrawler.java | 2 +- .../zdf/json/ZdfFilmDetailDeserializer.java | 17 +- .../crawler/zdf/tasks/ZdfFilmDetailTask.java | 14 +- .../mserver/crawler/CompareFilmlistsTest.java | 160 ++++++++++++++++++ .../json/ZdfFilmDetailDeserializerTest.java | 16 +- ...dfFilmDetailTaskMultipleLanguagesTest.java | 12 +- .../zdf/tasks/ZdfFilmDetailTaskTest.java | 14 +- 15 files changed, 384 insertions(+), 25 deletions(-) create mode 100644 src/main/java/de/mediathekview/mserver/base/utils/FilmlistDebugHelper.java create mode 100644 src/test/java/de/mediathekview/mserver/crawler/CompareFilmlistsTest.java diff --git a/MServer-Config.yaml b/MServer-Config.yaml index 758fe625f..5fc58be27 100644 --- a/MServer-Config.yaml +++ b/MServer-Config.yaml @@ -1,7 +1,7 @@ #### Server configurations #### # The maximum amount of cpu threads to be used. -maximumCpuThreads: 1 +maximumCpuThreads: 10 # The maximum duration in minutes the server should run.
# If set to 0 the server runs without a time limit. @@ -28,11 +28,11 @@ senderIncluded: #- FUNK #- KIKA # - DW - - ORF + # - ORF #- PHOENIX #- SRF #- SR - #- ZDF + - ZDF #SRF,SR,PHONIX,ORF,KIKA,DW,3SAT< @@ -130,7 +130,7 @@ maximumCrawlDurationInMinutes: 120 # Enables the topics search # maximumSubpages limits the depth of the topics search -topicsSearchEnabled: true +topicsSearchEnabled: false # The maximum amount of sub pages to be crawled.
# Example: If a Sendung overview side has 10 pages with videos for this Sendung and @@ -176,7 +176,8 @@ senderConfigurations: maximumSubpages: 2 maximumRequestsPerSecond: 8.0 ZDF: - maximumRequestsPerSecond: 10.0 + maximumDaysForSendungVerpasstSection: 21 + maximumRequestsPerSecond: 20 FUNK: maximumUrlsPerTask: 99 DREISAT: diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmlistDebugHelper.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmlistDebugHelper.java new file mode 100644 index 000000000..92bbae310 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/base/utils/FilmlistDebugHelper.java @@ -0,0 +1,111 @@ +package de.mediathekview.mserver.base.utils; + +import java.util.ArrayList; +import java.util.Set; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import de.mediathekview.mlib.daten.Film; +import de.mediathekview.mlib.daten.Filmlist; +import de.mediathekview.mlib.daten.Resolution; +import de.mediathekview.mserver.crawler.kika.KikaApiCrawler; + +public class FilmlistDebugHelper { + private static final Logger LOG = LogManager.getLogger(FilmlistDebugHelper.class); + + + + public static Film getFilmFromSet(Set base, Film searchFilm) { + for (Film e : base) { + if (e.equals(searchFilm)) { + return e; + } + } + return null; + } + + public static void printFilmlistForSender(Filmlist list, String sender, boolean printFullDetails) { + list.getFilms().values().stream().forEach( e -> { + if (e.getSenderName().equalsIgnoreCase(sender)) { + if (printFullDetails) { + LOG.debug("{} {} {}", e.getTitel(), e.getThema(), e); + } else { + LOG.debug("{} {}", e.getTitel(), e.getThema()); + } + } + }); + } + + public static void printFilmlistForThema(Filmlist list, String thema, boolean printFullDetails) { + list.getFilms().values().stream().forEach( e -> { + if (e.getThema().equalsIgnoreCase(thema)) { + if (printFullDetails) { + LOG.debug("{} {} {} {}", e.getSenderName(), e.getTitel(), e.getThema(), e); + } else { + LOG.debug("{} {} {}", e.getSenderName(), e.getTitel(), e.getThema()); + } + } + }); + } + + + + + public static void compareFilmlist(Filmlist aFilmlist, Filmlist bFilmlist) { + ArrayList bFilms = new ArrayList<>(bFilmlist.getFilms().values()); + aFilmlist.getFilms().values().forEach( f -> { + if (bFilms.indexOf(f) == -1) { + LOG.info("Missing Film in source list"); + LOG.info(f.toString()); + } else { + Film expectedFilm = bFilms.get(bFilms.indexOf(f)); + compare(f, expectedFilm); + } + }); + ArrayList aFilms = new ArrayList<>(aFilmlist.getFilms().values()); + bFilms.forEach( f -> { + if (aFilms.indexOf(f) == -1) { + LOG.info("Missing Film in target list"); + LOG.info(f.toString()); + } + }); + } + + private static void compare(Film aFilm, Film bFilm) { + String error = ""; + if (!aFilm.getSenderName().equalsIgnoreCase(bFilm.getSenderName())) { + error = "Incorrect Sender"; + } else if (!aFilm.getTitel().equalsIgnoreCase(bFilm.getTitel())){ + error = "Incorrect Title"; + } else if (!aFilm.getThema().equalsIgnoreCase(bFilm.getThema())){ + error = "Incorrect Topic"; + } else if (!aFilm.getDuration().equals(bFilm.getDuration())){ + error = "Incorrect Duration"; + } else if (!aFilm.getBeschreibung().equalsIgnoreCase(bFilm.getBeschreibung())){ + error = "Incorrect Description"; + } else if (!aFilm.getWebsite().toString().equalsIgnoreCase(bFilm.getWebsite().toString())){ + error = "Incorrect website"; + } else if (!aFilm.getTime().equals(bFilm.getTime())){ + error = "Incorrect Time"; + } else if (!aFilm.getSubtitles().equals(bFilm.getSubtitles())){ + error = "Incorrect subtitle"; + } else if (bFilm.getUrl(Resolution.SMALL) != null && + !aFilm.getUrl(Resolution.SMALL).toString().equalsIgnoreCase(bFilm.getUrl(Resolution.SMALL).toString())) { + error = "URL SMALL"; + } else if (bFilm.getUrl(Resolution.NORMAL) != null && + !aFilm.getUrl(Resolution.NORMAL).toString().equalsIgnoreCase(bFilm.getUrl(Resolution.NORMAL).toString())) { + error = "URL NORMAL"; + } else if (bFilm.getUrl(Resolution.HD) != null && + !aFilm.getUrl(Resolution.HD).toString().equalsIgnoreCase(bFilm.getUrl(Resolution.HD).toString())) { + error = "URL HD"; + } + // + if (error != "") { + LOG.info(error); + LOG.info(aFilm.toString()); + LOG.info(bFilm.toString()); + } + } + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreiSatCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreiSatCrawler.java index 7efcc8366..e17afde86 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreiSatCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreiSatCrawler.java @@ -26,7 +26,7 @@ public DreiSatCrawler( final Collection aMessageListeners, final Collection aProgressListeners, final MServerConfigManager rootConfig) { - super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig); + super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig, DreisatConstants.PARTNER_TO_SENDER); } /** diff --git a/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreisatConstants.java b/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreisatConstants.java index 2134634b5..edd6a21ba 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreisatConstants.java +++ b/src/main/java/de/mediathekview/mserver/crawler/dreisat/DreisatConstants.java @@ -1,5 +1,10 @@ package de.mediathekview.mserver.crawler.dreisat; +import java.util.HashMap; +import java.util.Map; + +import de.mediathekview.mlib.daten.Sender; + public final class DreisatConstants { /** Base url of the 3Sat website. */ @@ -13,5 +18,11 @@ public final class DreisatConstants { public static final String URL_HTML_DAY = URL_BASE + "/programm?airtimeDate=%s"; + public static final Map PARTNER_TO_SENDER = new HashMap<>(); + + static { + PARTNER_TO_SENDER.put("3sat", Sender.DREISAT); + } + private DreisatConstants() {} } diff --git a/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixConstants.java b/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixConstants.java index 079261d41..d0c4f2d4a 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixConstants.java +++ b/src/main/java/de/mediathekview/mserver/crawler/phoenix/PhoenixConstants.java @@ -1,5 +1,10 @@ package de.mediathekview.mserver.crawler.phoenix; +import java.util.HashMap; +import java.util.Map; + +import de.mediathekview.mlib.daten.Sender; + public final class PhoenixConstants { private PhoenixConstants() {} @@ -10,4 +15,11 @@ private PhoenixConstants() {} public static final String URL_FILM_DETAIL_JSON = "/response/id/"; public static final String URL_VIDEO_DETAILS = "%s/php/mediaplayer/data/beitrags_details.php?id=%s"; + + public static final Map PARTNER_TO_SENDER = new HashMap<>(); + + static { + PARTNER_TO_SENDER.put("Phoenix", Sender.PHOENIX); + } + } diff --git a/src/main/java/de/mediathekview/mserver/crawler/phoenix/tasks/PhoenixFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/phoenix/tasks/PhoenixFilmDetailTask.java index a10280c21..775fd5ba3 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/phoenix/tasks/PhoenixFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/phoenix/tasks/PhoenixFilmDetailTask.java @@ -72,7 +72,7 @@ protected void processRestTarget(final CrawlerUrlDTO aDTO, final WebTarget aTarg this.filmDetailHost, filmDetailDto.getBaseName()))); final ZdfFilmDetailTask zdfFilmDetailTask = - new ZdfFilmDetailTask(this.crawler, "", shows, null); + new ZdfFilmDetailTask(this.crawler, "", shows, null, PhoenixConstants.PARTNER_TO_SENDER); final Set films = zdfFilmDetailTask.invoke(); films.forEach( film -> { diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java index db2220ead..816fdafe0 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/AbstractZdfCrawler.java @@ -1,6 +1,7 @@ package de.mediathekview.mserver.crawler.zdf; import de.mediathekview.mlib.daten.Film; +import de.mediathekview.mlib.daten.Sender; import de.mediathekview.mlib.messages.listener.MessageListener; import de.mediathekview.mserver.base.config.MServerConfigManager; import de.mediathekview.mserver.base.messages.ServerMessages; @@ -19,6 +20,7 @@ import java.time.temporal.ChronoUnit; import java.util.Collection; import java.util.HashSet; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; @@ -29,13 +31,16 @@ public abstract class AbstractZdfCrawler extends AbstractCrawler { private static final Logger LOG = LogManager.getLogger(AbstractZdfCrawler.class); + private final Map partner2Sender; protected AbstractZdfCrawler( final ForkJoinPool aForkJoinPool, final Collection aMessageListeners, final Collection aProgressListeners, - final MServerConfigManager rootConfig) { + final MServerConfigManager rootConfig, + final Map partner2Sender) { super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig); + this.partner2Sender = partner2Sender; } @Override @@ -60,7 +65,8 @@ protected RecursiveTask> createCrawlerTask() { this, getApiUrlBase(), new ConcurrentLinkedQueue<>(shows), - configuration.getVideoAuthKey().orElse(null)); + configuration.getVideoAuthKey().orElse(null), + partner2Sender); } catch (final InterruptedException ex) { LOG.debug("{} crawler interrupted.", getSender().getName(), ex); Thread.currentThread().interrupt(); diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfConstants.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfConstants.java index 96245c95d..32e20fb41 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfConstants.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfConstants.java @@ -1,5 +1,10 @@ package de.mediathekview.mserver.crawler.zdf; +import java.util.HashMap; +import java.util.Map; + +import de.mediathekview.mlib.daten.Sender; + public final class ZdfConstants { /** Name of the header required for authentification. */ @@ -27,6 +32,16 @@ public final class ZdfConstants { /** The language key of german audio description. */ public static final String LANGUAGE_GERMAN_AD = LANGUAGE_GERMAN + LANGUAGE_SUFFIX_AD; public static final String LANGUAGE_GERMAN_DGS = LANGUAGE_GERMAN + LANGUAGE_SUFFIX_DGS; + + public static final Map PARTNER_TO_SENDER = new HashMap<>(); + + static { + PARTNER_TO_SENDER.put("ZDFinfo", Sender.ZDF); + PARTNER_TO_SENDER.put("ZDFneo", Sender.ZDF); + PARTNER_TO_SENDER.put("ZDF", Sender.ZDF); + PARTNER_TO_SENDER.put("EMPTY", Sender.ZDF); + // IGNORED Sender [KI.KA, WDR, PHOENIX, one, HR, 3sat, SWR, arte, BR, RBB, ARD, daserste, alpha, MDR, radiobremen, funk, ZDF, NDR, SR] + } private ZdfConstants() {} } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java index 8bb6342d3..d0bf1261a 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfCrawler.java @@ -30,7 +30,7 @@ public ZdfCrawler( final Collection aMessageListeners, final Collection aProgressListeners, final MServerConfigManager rootConfig) { - super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig); + super(aForkJoinPool, aMessageListeners, aProgressListeners, rootConfig, ZdfConstants.PARTNER_TO_SENDER); } @Override diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java index bfabf95e2..88f878d83 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java @@ -44,6 +44,7 @@ public class ZdfFilmDetailDeserializer implements JsonDeserializer partner2Sender; - public ZdfFilmDetailDeserializer(final String apiUrlBase, final Sender sender) { + public ZdfFilmDetailDeserializer(final String apiUrlBase, Map partner2Sender) { this.apiUrlBase = apiUrlBase; - this.sender = sender; + this.partner2Sender= partner2Sender; } @Override @@ -93,7 +94,12 @@ public Optional deserialize( mainVideoTarget = mainVideo.get(JSON_ELEMENT_TARGET).getAsJsonObject(); } } - + final Optional tvService = JsonUtils.getElementValueAsString(aJsonObject, JSON_ELEMENT_TVSERVICE); + //System.out.println(tvService + " " + partner2Sender.get(tvService.orElse("EMPTY"))); + if (!partner2Sender.containsKey(tvService.orElse("EMPTY"))) { + return Optional.empty(); + } + final Optional title = parseTitle(rootNode, programItemTarget); final Optional topic = parseTopic(rootNode); final Optional description = parseDescription(rootNode); @@ -106,7 +112,7 @@ public Optional deserialize( if (title.isPresent()) { final Optional film = - createFilm(topic, title.get(), description, website, time, duration); + createFilm(partner2Sender.get(tvService.orElse("EMPTY")), topic, title.get(), description, website, time, duration); if (film.isPresent() && downloadUrl.containsKey(DOWNLOAD_URL_DEFAULT)) { return Optional.of(new ZdfFilmDto(film.get(), downloadUrl.get(DOWNLOAD_URL_DEFAULT), downloadUrl.get(DOWNLOAD_URL_DGS))); @@ -157,6 +163,7 @@ private String finalizeDownloadUrl(final String url) { } private Optional createFilm( + final Sender sender, final Optional aTopic, final String aTitle, final Optional aDescription, diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java index ab3d60c1a..1b04a1eb7 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java @@ -5,6 +5,7 @@ import de.mediathekview.mlib.daten.FilmUrl; import de.mediathekview.mlib.daten.GeoLocations; import de.mediathekview.mlib.daten.Resolution; +import de.mediathekview.mlib.daten.Sender; import de.mediathekview.mserver.crawler.basic.AbstractCrawler; import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; @@ -33,17 +34,19 @@ public class ZdfFilmDetailTask extends ZdfTaskBase { private final transient ZdfVideoUrlOptimizer optimizer = new ZdfVideoUrlOptimizer(crawler); private final String apiUrlBase; + private final Map partner2Sender; public ZdfFilmDetailTask( final AbstractCrawler aCrawler, final String aApiUrlBase, final Queue aUrlToCrawlDtos, - final String authKey) { + final String authKey, + final Map partner2Sender) { super(aCrawler, aUrlToCrawlDtos, authKey); apiUrlBase = aApiUrlBase; - + this.partner2Sender = partner2Sender; registerJsonDeserializer( - OPTIONAL_FILM_TYPE_TOKEN, new ZdfFilmDetailDeserializer(apiUrlBase, aCrawler.getSender())); + OPTIONAL_FILM_TYPE_TOKEN, new ZdfFilmDetailDeserializer(apiUrlBase, partner2Sender)); registerJsonDeserializer(OPTIONAL_DOWNLOAD_DTO_TYPE_TOKEN, new ZdfDownloadDtoDeserializer()); } @@ -113,9 +116,6 @@ protected void processRestTarget(final CrawlerUrlDTO aDto, final WebTarget aTarg crawler.incrementAndGetErrorCount(); crawler.updateProgress(); } - } else { - crawler.incrementAndGetErrorCount(); - crawler.updateProgress(); } } @@ -140,7 +140,7 @@ private void appendSignLanguage(DownloadDto downloadDto, Optional urlSig protected AbstractRecursiveConverterTask createNewOwnInstance( final Queue aElementsToProcess) { return new ZdfFilmDetailTask( - crawler, apiUrlBase, aElementsToProcess, getAuthKey().orElse(null)); + crawler, apiUrlBase, aElementsToProcess, getAuthKey().orElse(null), partner2Sender); } private void addFilm(final DownloadDto downloadDto, final Film result) diff --git a/src/test/java/de/mediathekview/mserver/crawler/CompareFilmlistsTest.java b/src/test/java/de/mediathekview/mserver/crawler/CompareFilmlistsTest.java new file mode 100644 index 000000000..321d3b98f --- /dev/null +++ b/src/test/java/de/mediathekview/mserver/crawler/CompareFilmlistsTest.java @@ -0,0 +1,160 @@ +package de.mediathekview.mserver.crawler; + +import de.mediathekview.mlib.daten.Film; +import de.mediathekview.mlib.daten.FilmUrl; +import de.mediathekview.mlib.daten.Filmlist; +import de.mediathekview.mlib.daten.Resolution; +import de.mediathekview.mlib.filmlisten.reader.FilmlistOldFormatReader; +import de.mediathekview.mlib.filmlisten.writer.FilmlistOldFormatWriter; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.junit.jupiter.api.Test; + +import com.google.common.base.Objects; + +import static org.junit.jupiter.api.Assertions.assertTrue; + + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Optional; + + + +public class CompareFilmlistsTest { + private static final Logger LOG = LogManager.getLogger(CompareFilmlistsTest.class); + private int fullmatch = 0; + private int missingLeft = 0; + private int missingRight = 0; + private int diff = 0; + + + @Test + void readFilmlistOldFormatIncludingBrokenRecords() + throws IOException { + // + LOG.info("Start"); + if (false) { + return; + } + + String aList = "C:/tmp/filme-old.json"; + String bList = "C:/tmp/filme-new.json"; + ClassLoader classLoader = getClass().getClassLoader(); + final Path aListPath = new File(aList).toPath(); + Optional aFilmlist = new FilmlistOldFormatReader().read(new FileInputStream(aListPath.toString())); + assertTrue(aFilmlist.isPresent()); + final Path bListPath = new File(bList).toPath(); + Optional bFilmlist = new FilmlistOldFormatReader().read(new FileInputStream(bListPath.toString())); + assertTrue(bFilmlist.isPresent()); + // + if (true) { + ArrayList bFilms = new ArrayList<>(bFilmlist.get().getFilms().values()); + aFilmlist.get().getFilms().values().forEach( f -> { + Film target = index(bFilms, f); + if (target != null) { + compare(f, target); + } + }); + aFilmlist.get().getFilms().values().forEach( f -> { + if (index(bFilms, f) == null) { + LOG.info("Missing Film in "+bList+" list"); + LOG.info(f.toString()); + missingLeft++; + } + }); + ArrayList aFilms = new ArrayList<>(aFilmlist.get().getFilms().values()); + bFilms.forEach( f -> { + Film target = index(aFilms, f); + if (target == null) { + LOG.info("Missing Film in "+aList+" list"); + LOG.info(f.toString()); + missingRight++; + } + });} + new FilmlistOldFormatWriter().write(aFilmlist.get(), Path.of("c:/tmp/aFilmlist.json")); + new FilmlistOldFormatWriter().write(bFilmlist.get(), Path.of("c:/tmp/bFilmlist.json")); + // + LOG.info("Matching: {} MissingLeft({}): {} MissingRight({}): {} Diff: {}", fullmatch, bList, missingLeft, aList, missingRight, diff); + } + + private void compare(Film aFilm, Film bFilm) { + String error = ""; + //if (!aFilm.getSenderName().equalsIgnoreCase(bFilm.getSenderName())) { + // error += "Incorrect Sender"; + //} + if (!aFilm.getTitel().equalsIgnoreCase(bFilm.getTitel())){ + error += "Incorrect Title '" + aFilm.getTitel() + "' vs '" + bFilm.getTitel() + "'"; + } + if (!aFilm.getThema().equalsIgnoreCase(bFilm.getThema())){ + error += "Incorrect Topic '" + aFilm.getThema() + "' vs '" + bFilm.getThema() + "'"; + } + if (!aFilm.getDuration().equals(bFilm.getDuration())){ + error += "Incorrect Duration" + aFilm.getDuration() + "' vs '" + bFilm.getDuration() + "'"; + } + if (false && !aFilm.getBeschreibung().equalsIgnoreCase(bFilm.getBeschreibung())) { + // new parser cuts out last char + if (aFilm.getBeschreibung().length() > 70 && bFilm.getBeschreibung().length() > 70 && + !aFilm.getBeschreibung().substring(1,aFilm.getBeschreibung().length()-19).equalsIgnoreCase(bFilm.getBeschreibung().substring(1,bFilm.getBeschreibung().length()-18))){ + error += "Incorrect Description"; + } else if (aFilm.getBeschreibung().length() > 20 && bFilm.getBeschreibung().length() > 20 && + !aFilm.getBeschreibung().substring(1,20).equalsIgnoreCase(bFilm.getBeschreibung().substring(1,20))){ + error += "Incorrect Description"; + } + } + if (!aFilm.getWebsite().toString().equalsIgnoreCase(bFilm.getWebsite().toString())){ + error += "Incorrect website" + aFilm.getWebsite().toString() + "' vs '" + bFilm.getWebsite().toString() + "'"; + } + if (!aFilm.getTime().equals(bFilm.getTime())){ + error += "Incorrect Time " + aFilm.getTime() + "' vs '" + bFilm.getTime() + "'"; + } + //if (!aFilm.getSubtitles().equals(bFilm.getSubtitles())){ + // error += "Incorrect subtitle"; + //} + if (!compareFilmUrl(aFilm.getUrl(Resolution.SMALL), bFilm.getUrl(Resolution.SMALL))) { + error += "URL SMALL " + aFilm.getUrl(Resolution.SMALL) + "' vs '" + bFilm.getUrl(Resolution.SMALL) + "'"; + } + if (!compareFilmUrl(aFilm.getUrl(Resolution.NORMAL), bFilm.getUrl(Resolution.NORMAL))) { + error += "URL NORMAL " + aFilm.getUrl(Resolution.NORMAL) + "' vs '" + bFilm.getUrl(Resolution.NORMAL) + "'"; + } + if (!compareFilmUrl(aFilm.getUrl(Resolution.HD), bFilm.getUrl(Resolution.HD))) { + error += "URL HD "+ aFilm.getUrl(Resolution.HD) + "' vs '" + bFilm.getUrl(Resolution.HD) + "'"; + } + // + if (error != "") { + LOG.info(error); + LOG.info(aFilm.toString()); + LOG.info(bFilm.toString()); + diff++; + } else { + fullmatch++; + } + } + + private static boolean compareFilmUrl(FilmUrl a, FilmUrl b) { + if (a == null && b == null) { + return true; + } + if (a == null || b == null) { + return false; + } + return (a.getUrl().toString().equalsIgnoreCase(b.getUrl().toString())); + } + + private static Film index(ArrayList list, Film aFilm) { + for (Film e : list) { + if (e.getSenderName().equalsIgnoreCase(aFilm.getSenderName()) && + e.getTitel().equalsIgnoreCase(aFilm.getTitel()) && + e.getThema().equalsIgnoreCase(aFilm.getThema()) && + e.getTime().equals(aFilm.getTime())) { + return e; + } + } + return null; +} + +} diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java index 2585d1c21..d3836cc4e 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java @@ -14,6 +14,8 @@ import java.time.LocalDateTime; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import java.util.Optional; import org.junit.Test; import org.junit.runner.RunWith; @@ -177,7 +179,7 @@ public static Collection data() { public void test() { final JsonObject json = JsonFileReader.readJson(jsonFile); final ZdfFilmDetailDeserializer target = - new ZdfFilmDetailDeserializer(ZdfConstants.URL_API_BASE, expectedSender); + new ZdfFilmDetailDeserializer(ZdfConstants.URL_API_BASE, createPartnerMap()); final Optional actual = target.deserialize(json, Film.class, null); @@ -195,4 +197,16 @@ public void test() { assertThat(actual.get().getUrl(), equalTo(expectedDownloadUrl)); assertThat(actual.get().getUrlSignLanguage(), equalTo(expectedDownloadUrlSignLanguage)); } + + private Map createPartnerMap() { + Map partnerMap = new HashMap<>(); + partnerMap.put("ZDFinfo", Sender.ZDF); + partnerMap.put("ZDFneo", Sender.ZDF); + partnerMap.put("ZDF", Sender.ZDF); + partnerMap.put("EMPTY", Sender.ZDF); + partnerMap.put("KI.KA", Sender.ZDF); // for testing only + partnerMap.put("3sat", Sender.DREISAT); // for testing only + // IGNORED Sender [KI.KA, WDR, PHOENIX, one, HR, 3sat, SWR, arte, BR, RBB, ARD, daserste, alpha, MDR, radiobremen, funk, ZDF, NDR, SR] + return partnerMap; + } } diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTaskMultipleLanguagesTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTaskMultipleLanguagesTest.java index a60d36553..3b54c854f 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTaskMultipleLanguagesTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTaskMultipleLanguagesTest.java @@ -79,6 +79,16 @@ public void testGermanAndEnglish() { private Set executeTask(final String aDetailUrl) { final Queue urls = new ConcurrentLinkedQueue<>(); urls.add(new CrawlerUrlDTO(getWireMockBaseUrlSafe() + aDetailUrl)); - return new ZdfFilmDetailTask(createCrawler(), getWireMockBaseUrlSafe(), urls, null).invoke(); + return new ZdfFilmDetailTask(createCrawler(), getWireMockBaseUrlSafe(), urls, null, createPartnerMap()).invoke(); + } + + private Map createPartnerMap() { + Map partnerMap = new HashMap<>(); + partnerMap.put("ZDFinfo", Sender.ZDF); + partnerMap.put("ZDFneo", Sender.ZDF); + partnerMap.put("ZDF", Sender.ZDF); + partnerMap.put("EMPTY", Sender.ZDF); + // IGNORED Sender [KI.KA, WDR, PHOENIX, one, HR, 3sat, SWR, arte, BR, RBB, ARD, daserste, alpha, MDR, radiobremen, funk, ZDF, NDR, SR] + return partnerMap; } } diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTaskTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTaskTest.java index 54177037c..3a4f5f1ca 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTaskTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTaskTest.java @@ -14,6 +14,8 @@ import java.time.LocalDateTime; import java.util.Arrays; import java.util.Collection; +import java.util.HashMap; +import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; @@ -255,6 +257,16 @@ public void test() { private Set executeTask(final String detailUrl) { final Queue urls = new ConcurrentLinkedQueue<>(); urls.add(new CrawlerUrlDTO(getWireMockBaseUrlSafe() + detailUrl)); - return new ZdfFilmDetailTask(createCrawler(), getWireMockBaseUrlSafe(), urls, null).invoke(); + return new ZdfFilmDetailTask(createCrawler(), getWireMockBaseUrlSafe(), urls, null, createPartnerMap()).invoke(); + } + + private Map createPartnerMap() { + Map partnerMap = new HashMap<>(); + partnerMap.put("ZDFinfo", Sender.ZDF); + partnerMap.put("ZDFneo", Sender.ZDF); + partnerMap.put("ZDF", Sender.ZDF); + partnerMap.put("EMPTY", Sender.ZDF); + // IGNORED Sender [KI.KA, WDR, PHOENIX, one, HR, 3sat, SWR, arte, BR, RBB, ARD, daserste, alpha, MDR, radiobremen, funk, ZDF, NDR, SR] + return partnerMap; } } From b4360ef2694032d5c780a9d9fc6dda1282054f9f Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sun, 2 Jun 2024 16:36:19 +0200 Subject: [PATCH 2/4] remove debug source --- .../base/utils/FilmlistDebugHelper.java | 111 ------------ .../mserver/crawler/CompareFilmlistsTest.java | 160 ------------------ 2 files changed, 271 deletions(-) delete mode 100644 src/main/java/de/mediathekview/mserver/base/utils/FilmlistDebugHelper.java delete mode 100644 src/test/java/de/mediathekview/mserver/crawler/CompareFilmlistsTest.java diff --git a/src/main/java/de/mediathekview/mserver/base/utils/FilmlistDebugHelper.java b/src/main/java/de/mediathekview/mserver/base/utils/FilmlistDebugHelper.java deleted file mode 100644 index 92bbae310..000000000 --- a/src/main/java/de/mediathekview/mserver/base/utils/FilmlistDebugHelper.java +++ /dev/null @@ -1,111 +0,0 @@ -package de.mediathekview.mserver.base.utils; - -import java.util.ArrayList; -import java.util.Set; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import de.mediathekview.mlib.daten.Film; -import de.mediathekview.mlib.daten.Filmlist; -import de.mediathekview.mlib.daten.Resolution; -import de.mediathekview.mserver.crawler.kika.KikaApiCrawler; - -public class FilmlistDebugHelper { - private static final Logger LOG = LogManager.getLogger(FilmlistDebugHelper.class); - - - - public static Film getFilmFromSet(Set base, Film searchFilm) { - for (Film e : base) { - if (e.equals(searchFilm)) { - return e; - } - } - return null; - } - - public static void printFilmlistForSender(Filmlist list, String sender, boolean printFullDetails) { - list.getFilms().values().stream().forEach( e -> { - if (e.getSenderName().equalsIgnoreCase(sender)) { - if (printFullDetails) { - LOG.debug("{} {} {}", e.getTitel(), e.getThema(), e); - } else { - LOG.debug("{} {}", e.getTitel(), e.getThema()); - } - } - }); - } - - public static void printFilmlistForThema(Filmlist list, String thema, boolean printFullDetails) { - list.getFilms().values().stream().forEach( e -> { - if (e.getThema().equalsIgnoreCase(thema)) { - if (printFullDetails) { - LOG.debug("{} {} {} {}", e.getSenderName(), e.getTitel(), e.getThema(), e); - } else { - LOG.debug("{} {} {}", e.getSenderName(), e.getTitel(), e.getThema()); - } - } - }); - } - - - - - public static void compareFilmlist(Filmlist aFilmlist, Filmlist bFilmlist) { - ArrayList bFilms = new ArrayList<>(bFilmlist.getFilms().values()); - aFilmlist.getFilms().values().forEach( f -> { - if (bFilms.indexOf(f) == -1) { - LOG.info("Missing Film in source list"); - LOG.info(f.toString()); - } else { - Film expectedFilm = bFilms.get(bFilms.indexOf(f)); - compare(f, expectedFilm); - } - }); - ArrayList aFilms = new ArrayList<>(aFilmlist.getFilms().values()); - bFilms.forEach( f -> { - if (aFilms.indexOf(f) == -1) { - LOG.info("Missing Film in target list"); - LOG.info(f.toString()); - } - }); - } - - private static void compare(Film aFilm, Film bFilm) { - String error = ""; - if (!aFilm.getSenderName().equalsIgnoreCase(bFilm.getSenderName())) { - error = "Incorrect Sender"; - } else if (!aFilm.getTitel().equalsIgnoreCase(bFilm.getTitel())){ - error = "Incorrect Title"; - } else if (!aFilm.getThema().equalsIgnoreCase(bFilm.getThema())){ - error = "Incorrect Topic"; - } else if (!aFilm.getDuration().equals(bFilm.getDuration())){ - error = "Incorrect Duration"; - } else if (!aFilm.getBeschreibung().equalsIgnoreCase(bFilm.getBeschreibung())){ - error = "Incorrect Description"; - } else if (!aFilm.getWebsite().toString().equalsIgnoreCase(bFilm.getWebsite().toString())){ - error = "Incorrect website"; - } else if (!aFilm.getTime().equals(bFilm.getTime())){ - error = "Incorrect Time"; - } else if (!aFilm.getSubtitles().equals(bFilm.getSubtitles())){ - error = "Incorrect subtitle"; - } else if (bFilm.getUrl(Resolution.SMALL) != null && - !aFilm.getUrl(Resolution.SMALL).toString().equalsIgnoreCase(bFilm.getUrl(Resolution.SMALL).toString())) { - error = "URL SMALL"; - } else if (bFilm.getUrl(Resolution.NORMAL) != null && - !aFilm.getUrl(Resolution.NORMAL).toString().equalsIgnoreCase(bFilm.getUrl(Resolution.NORMAL).toString())) { - error = "URL NORMAL"; - } else if (bFilm.getUrl(Resolution.HD) != null && - !aFilm.getUrl(Resolution.HD).toString().equalsIgnoreCase(bFilm.getUrl(Resolution.HD).toString())) { - error = "URL HD"; - } - // - if (error != "") { - LOG.info(error); - LOG.info(aFilm.toString()); - LOG.info(bFilm.toString()); - } - } - -} diff --git a/src/test/java/de/mediathekview/mserver/crawler/CompareFilmlistsTest.java b/src/test/java/de/mediathekview/mserver/crawler/CompareFilmlistsTest.java deleted file mode 100644 index 321d3b98f..000000000 --- a/src/test/java/de/mediathekview/mserver/crawler/CompareFilmlistsTest.java +++ /dev/null @@ -1,160 +0,0 @@ -package de.mediathekview.mserver.crawler; - -import de.mediathekview.mlib.daten.Film; -import de.mediathekview.mlib.daten.FilmUrl; -import de.mediathekview.mlib.daten.Filmlist; -import de.mediathekview.mlib.daten.Resolution; -import de.mediathekview.mlib.filmlisten.reader.FilmlistOldFormatReader; -import de.mediathekview.mlib.filmlisten.writer.FilmlistOldFormatWriter; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.junit.jupiter.api.Test; - -import com.google.common.base.Objects; - -import static org.junit.jupiter.api.Assertions.assertTrue; - - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Optional; - - - -public class CompareFilmlistsTest { - private static final Logger LOG = LogManager.getLogger(CompareFilmlistsTest.class); - private int fullmatch = 0; - private int missingLeft = 0; - private int missingRight = 0; - private int diff = 0; - - - @Test - void readFilmlistOldFormatIncludingBrokenRecords() - throws IOException { - // - LOG.info("Start"); - if (false) { - return; - } - - String aList = "C:/tmp/filme-old.json"; - String bList = "C:/tmp/filme-new.json"; - ClassLoader classLoader = getClass().getClassLoader(); - final Path aListPath = new File(aList).toPath(); - Optional aFilmlist = new FilmlistOldFormatReader().read(new FileInputStream(aListPath.toString())); - assertTrue(aFilmlist.isPresent()); - final Path bListPath = new File(bList).toPath(); - Optional bFilmlist = new FilmlistOldFormatReader().read(new FileInputStream(bListPath.toString())); - assertTrue(bFilmlist.isPresent()); - // - if (true) { - ArrayList bFilms = new ArrayList<>(bFilmlist.get().getFilms().values()); - aFilmlist.get().getFilms().values().forEach( f -> { - Film target = index(bFilms, f); - if (target != null) { - compare(f, target); - } - }); - aFilmlist.get().getFilms().values().forEach( f -> { - if (index(bFilms, f) == null) { - LOG.info("Missing Film in "+bList+" list"); - LOG.info(f.toString()); - missingLeft++; - } - }); - ArrayList aFilms = new ArrayList<>(aFilmlist.get().getFilms().values()); - bFilms.forEach( f -> { - Film target = index(aFilms, f); - if (target == null) { - LOG.info("Missing Film in "+aList+" list"); - LOG.info(f.toString()); - missingRight++; - } - });} - new FilmlistOldFormatWriter().write(aFilmlist.get(), Path.of("c:/tmp/aFilmlist.json")); - new FilmlistOldFormatWriter().write(bFilmlist.get(), Path.of("c:/tmp/bFilmlist.json")); - // - LOG.info("Matching: {} MissingLeft({}): {} MissingRight({}): {} Diff: {}", fullmatch, bList, missingLeft, aList, missingRight, diff); - } - - private void compare(Film aFilm, Film bFilm) { - String error = ""; - //if (!aFilm.getSenderName().equalsIgnoreCase(bFilm.getSenderName())) { - // error += "Incorrect Sender"; - //} - if (!aFilm.getTitel().equalsIgnoreCase(bFilm.getTitel())){ - error += "Incorrect Title '" + aFilm.getTitel() + "' vs '" + bFilm.getTitel() + "'"; - } - if (!aFilm.getThema().equalsIgnoreCase(bFilm.getThema())){ - error += "Incorrect Topic '" + aFilm.getThema() + "' vs '" + bFilm.getThema() + "'"; - } - if (!aFilm.getDuration().equals(bFilm.getDuration())){ - error += "Incorrect Duration" + aFilm.getDuration() + "' vs '" + bFilm.getDuration() + "'"; - } - if (false && !aFilm.getBeschreibung().equalsIgnoreCase(bFilm.getBeschreibung())) { - // new parser cuts out last char - if (aFilm.getBeschreibung().length() > 70 && bFilm.getBeschreibung().length() > 70 && - !aFilm.getBeschreibung().substring(1,aFilm.getBeschreibung().length()-19).equalsIgnoreCase(bFilm.getBeschreibung().substring(1,bFilm.getBeschreibung().length()-18))){ - error += "Incorrect Description"; - } else if (aFilm.getBeschreibung().length() > 20 && bFilm.getBeschreibung().length() > 20 && - !aFilm.getBeschreibung().substring(1,20).equalsIgnoreCase(bFilm.getBeschreibung().substring(1,20))){ - error += "Incorrect Description"; - } - } - if (!aFilm.getWebsite().toString().equalsIgnoreCase(bFilm.getWebsite().toString())){ - error += "Incorrect website" + aFilm.getWebsite().toString() + "' vs '" + bFilm.getWebsite().toString() + "'"; - } - if (!aFilm.getTime().equals(bFilm.getTime())){ - error += "Incorrect Time " + aFilm.getTime() + "' vs '" + bFilm.getTime() + "'"; - } - //if (!aFilm.getSubtitles().equals(bFilm.getSubtitles())){ - // error += "Incorrect subtitle"; - //} - if (!compareFilmUrl(aFilm.getUrl(Resolution.SMALL), bFilm.getUrl(Resolution.SMALL))) { - error += "URL SMALL " + aFilm.getUrl(Resolution.SMALL) + "' vs '" + bFilm.getUrl(Resolution.SMALL) + "'"; - } - if (!compareFilmUrl(aFilm.getUrl(Resolution.NORMAL), bFilm.getUrl(Resolution.NORMAL))) { - error += "URL NORMAL " + aFilm.getUrl(Resolution.NORMAL) + "' vs '" + bFilm.getUrl(Resolution.NORMAL) + "'"; - } - if (!compareFilmUrl(aFilm.getUrl(Resolution.HD), bFilm.getUrl(Resolution.HD))) { - error += "URL HD "+ aFilm.getUrl(Resolution.HD) + "' vs '" + bFilm.getUrl(Resolution.HD) + "'"; - } - // - if (error != "") { - LOG.info(error); - LOG.info(aFilm.toString()); - LOG.info(bFilm.toString()); - diff++; - } else { - fullmatch++; - } - } - - private static boolean compareFilmUrl(FilmUrl a, FilmUrl b) { - if (a == null && b == null) { - return true; - } - if (a == null || b == null) { - return false; - } - return (a.getUrl().toString().equalsIgnoreCase(b.getUrl().toString())); - } - - private static Film index(ArrayList list, Film aFilm) { - for (Film e : list) { - if (e.getSenderName().equalsIgnoreCase(aFilm.getSenderName()) && - e.getTitel().equalsIgnoreCase(aFilm.getTitel()) && - e.getThema().equalsIgnoreCase(aFilm.getThema()) && - e.getTime().equals(aFilm.getTime())) { - return e; - } - } - return null; -} - -} From bd3cc4f471e200cbe42ef4596548c9eb9cdf6aee Mon Sep 17 00:00:00 2001 From: CodingPF Date: Mon, 3 Jun 2024 23:48:29 +0200 Subject: [PATCH 3/4] enable more debug info --- .../base/utils/CheckUrlAvailability.java | 24 +++++++---- .../mserver/crawler/zdf/ZdfFilmDto.java | 19 +++++--- .../zdf/json/ZdfFilmDetailDeserializer.java | 7 +-- .../crawler/zdf/tasks/ZdfFilmDetailTask.java | 43 +++++++++++-------- .../json/ZdfFilmDetailDeserializerTest.java | 2 +- 5 files changed, 56 insertions(+), 39 deletions(-) diff --git a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java index 7a8df2725..0f83b3179 100644 --- a/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java +++ b/src/main/java/de/mediathekview/mserver/base/utils/CheckUrlAvailability.java @@ -2,6 +2,8 @@ import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -19,9 +21,9 @@ public class CheckUrlAvailability { private final FileSizeDeterminer fsd; private int numberOfThreads = 10; private Long minFileSize = 2048L; - private int removedCounter = 0; + private AtomicInteger removedCounter = new AtomicInteger(0); private long timeoutInMS = 1*60*1000L; - private boolean timeout = false; + private AtomicBoolean timeout = new AtomicBoolean(false); private long start = 0; public CheckUrlAvailability(final long minFileSize, final long timeoutInSec, final int numberOfThreads) { @@ -45,13 +47,13 @@ public Filmlist getAvaiableFilmlist(final Filmlist importList) { .join(); customThreadPool.shutdown(); // - LOG.debug("checked {} urls and removed {} in {} sec and timeout was reached: {}", importList.getFilms().size(), removedCounter, ((System.currentTimeMillis()-start)/1000), timeout); + LOG.debug("checked {} urls and removed {} in {} sec and timeout was reached: {}", importList.getFilms().size(), removedCounter.get(), ((System.currentTimeMillis()-start)/1000), timeout.get()); return filteredFilmlist; } private boolean isAvailable(Film pFilm) { - if (timeout || System.currentTimeMillis() > (start+timeoutInMS)) { - timeout = true; + if (timeout.get() || System.currentTimeMillis() > (start+timeoutInMS)) { + timeout.set(true); return true; } @@ -61,21 +63,25 @@ private boolean isAvailable(Film pFilm) { if (pFilm.getThema().equalsIgnoreCase("Livestream")) { // do not remove livestreams return true; + } else if (ri == null) { + LOG.debug("Film response (null): {} # {} # {} # {} ", normalUrl, pFilm.getSender(), pFilm.getThema(), pFilm.getTitel()); + removedCounter.incrementAndGet(); + return false; } else if (!(ri.getCode() >= 200 && ri.getCode() < 300)) { LOG.debug("Film response ({}): {} # {} # {} # {} ", ri.getCode(), normalUrl, pFilm.getSender(), pFilm.getThema(), pFilm.getTitel()); - removedCounter++; + removedCounter.incrementAndGet(); return false; } else if (ri.getContentType().equalsIgnoreCase("text/html")) { LOG.debug("Film content type({}): {} # {} # {} # {} ", ri.getContentType(), normalUrl, pFilm.getSender(), pFilm.getThema(), pFilm.getTitel()); - removedCounter++; + removedCounter.incrementAndGet(); return false; } else if (ri.getSize() < minFileSize && !normalUrl.endsWith("m3u8")) { LOG.debug("Film small ({}): {} # {} # {} # {} ", ri.getSize() , normalUrl, pFilm.getSender(), pFilm.getThema(), pFilm.getTitel()); - removedCounter++; + removedCounter.incrementAndGet(); return false; } else if (removedVideo(pFilm, ri.getPath())) { LOG.debug("Film url ({}): {} # {} # {} # {} ", ri.getPath(), normalUrl, pFilm.getSender(), pFilm.getThema(), pFilm.getTitel()); - removedCounter++; + removedCounter.incrementAndGet(); return false; } return true; diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfFilmDto.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfFilmDto.java index 60719c578..27f43884a 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfFilmDto.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/ZdfFilmDto.java @@ -6,14 +6,19 @@ import java.util.Objects; import java.util.Optional; -public class ZdfFilmDto extends CrawlerUrlDTO { +public class ZdfFilmDto { - private final Film film; + private final Optional film; private final Optional urlSignLanguage; + private final Optional videoUrl; - public ZdfFilmDto(final Film film, final String videoUrl, String urlSignLanguage) { - super(videoUrl); + public ZdfFilmDto(final Optional film, final String videoUrl, String urlSignLanguage) { this.film = film; + if (videoUrl == null) { + this.videoUrl = Optional.empty(); + } else { + this.videoUrl = Optional.of(videoUrl); + } if (urlSignLanguage != null && !urlSignLanguage.isEmpty()) { this.urlSignLanguage = Optional.of(urlSignLanguage); @@ -22,7 +27,11 @@ public ZdfFilmDto(final Film film, final String videoUrl, String urlSignLanguage } } - public Film getFilm() { + public Optional getUrl() { + return videoUrl; + } + + public Optional getFilm() { return film; } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java index 88f878d83..79174d1c5 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializer.java @@ -95,7 +95,6 @@ public Optional deserialize( } } final Optional tvService = JsonUtils.getElementValueAsString(aJsonObject, JSON_ELEMENT_TVSERVICE); - //System.out.println(tvService + " " + partner2Sender.get(tvService.orElse("EMPTY"))); if (!partner2Sender.containsKey(tvService.orElse("EMPTY"))) { return Optional.empty(); } @@ -113,11 +112,7 @@ public Optional deserialize( if (title.isPresent()) { final Optional film = createFilm(partner2Sender.get(tvService.orElse("EMPTY")), topic, title.get(), description, website, time, duration); - - if (film.isPresent() && downloadUrl.containsKey(DOWNLOAD_URL_DEFAULT)) { - return Optional.of(new ZdfFilmDto(film.get(), downloadUrl.get(DOWNLOAD_URL_DEFAULT), downloadUrl.get(DOWNLOAD_URL_DGS))); - } - LOG.error("ZdfFilmDetailDeserializer: no film or downloadUrl: {}, {}", topic, title.get()); + return Optional.of(new ZdfFilmDto(film, downloadUrl.get(DOWNLOAD_URL_DEFAULT), downloadUrl.get(DOWNLOAD_URL_DGS))); } else { LOG.error("ZdfFilmDetailDeserializer: no title found"); } diff --git a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java index 1b04a1eb7..c3c57edee 100644 --- a/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java +++ b/src/main/java/de/mediathekview/mserver/crawler/zdf/tasks/ZdfFilmDetailTask.java @@ -90,29 +90,36 @@ private static void updateTitle(final String aLanguage, final Film aFilm) { protected void processRestTarget(final CrawlerUrlDTO aDto, final WebTarget aTarget) { final Optional film = deserializeOptional(aTarget, OPTIONAL_FILM_TYPE_TOKEN); if (film.isPresent()) { - final Optional downloadDtoOptional = - deserializeOptional( - createWebTarget(film.get().getUrl()), OPTIONAL_DOWNLOAD_DTO_TYPE_TOKEN); - - if (downloadDtoOptional.isPresent()) { - final DownloadDto downloadDto = downloadDtoOptional.get(); - appendSignLanguage(downloadDto, film.get().getUrlSignLanguage()); - - try { - final Film result = film.get().getFilm(); - if (result.getDuration().isZero() && downloadDto.getDuration().isPresent()) { - result.setDuration(downloadDto.getDuration().get()); + if (film.get().getUrl().isPresent()) { + final Optional downloadDtoOptional = + deserializeOptional( + createWebTarget(film.get().getUrl().get()), OPTIONAL_DOWNLOAD_DTO_TYPE_TOKEN); + + if (downloadDtoOptional.isPresent()) { + final DownloadDto downloadDto = downloadDtoOptional.get(); + appendSignLanguage(downloadDto, film.get().getUrlSignLanguage()); + + try { + final Film result = film.get().getFilm().get(); + if (result.getDuration().isZero() && downloadDto.getDuration().isPresent()) { + result.setDuration(downloadDto.getDuration().get()); + } + addFilm(downloadDto, result); + + crawler.incrementAndGetActualCount(); + crawler.updateProgress(); + } catch (final MalformedURLException e) { + LOG.error("ZdfFilmDetailTask: url can't be parsed: ", e); + crawler.incrementAndGetErrorCount(); + crawler.updateProgress(); } - addFilm(downloadDto, result); - - crawler.incrementAndGetActualCount(); - crawler.updateProgress(); - } catch (final MalformedURLException e) { - LOG.error("ZdfFilmDetailTask: url can't be parsed: ", e); + } else { + LOG.error("ZdfFilmDetailTask: no video {} {} {} in {}",film.get().getFilm().get().getSenderName(), film.get().getFilm().get().getTitel(), film.get().getFilm().get().getThema() , aDto.toString()); crawler.incrementAndGetErrorCount(); crawler.updateProgress(); } } else { + LOG.error("ZdfFilmDetailTask: no film found in {}", aDto.toString()); crawler.incrementAndGetErrorCount(); crawler.updateProgress(); } diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java index d3836cc4e..b93ed3bad 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java @@ -186,7 +186,7 @@ public void test() { assertThat(actual.isPresent(), equalTo(true)); AssertFilm.assertEquals( - actual.get().getFilm(), + actual.get().getFilm().get(), expectedSender, expectedTopic, expectedTitle, From e46f16f5417b0411526989a4029c3314bfde691e Mon Sep 17 00:00:00 2001 From: CodingPF Date: Fri, 7 Jun 2024 19:59:47 +0200 Subject: [PATCH 4/4] align unit test to optional --- .../mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java index b93ed3bad..b91f60d6e 100644 --- a/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java +++ b/src/test/java/de/mediathekview/mserver/crawler/zdf/json/ZdfFilmDetailDeserializerTest.java @@ -194,7 +194,7 @@ public void test() { expectedDuration, expectedDescription, expectedWebsite); - assertThat(actual.get().getUrl(), equalTo(expectedDownloadUrl)); + assertThat(actual.get().getUrl().get(), equalTo(expectedDownloadUrl)); assertThat(actual.get().getUrlSignLanguage(), equalTo(expectedDownloadUrlSignLanguage)); }