Skip to content

Commit

Permalink
videourls from playlist
Browse files Browse the repository at this point in the history
  • Loading branch information
codingPF committed Feb 28, 2024
1 parent 2a936d3 commit 6d67553
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 227 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,12 @@ private Queue<OrfOnBreadCrumsUrlDTO> createDayUrlsToCrawl() {
private Set<OrfOnVideoInfoDTO> processAZUrlsToCrawl() throws InterruptedException, ExecutionException {
final ForkJoinTask<Set<OrfOnBreadCrumsUrlDTO>> letterTask = forkJoinPool.submit(new OrfOnAZTask(this, createAZUrlsToCrawl()));
final Set<OrfOnBreadCrumsUrlDTO> letterTaskTopics = letterTask.get();
final ForkJoinTask<Set<OrfOnVideoInfoDTO>> videosFromTopicsTask = forkJoinPool.submit(new OrfOnEpisodesTask(this, new ConcurrentLinkedQueue<>(letterTaskTopics)));
return videosFromTopicsTask.get();
final ForkJoinTask<Set<OrfOnBreadCrumsUrlDTO>> episodesFromTopicsTask = forkJoinPool.submit(new OrfOnEpisodesTask(this, new ConcurrentLinkedQueue<>(letterTaskTopics)));
final Set<OrfOnBreadCrumsUrlDTO> episodesFromTopics = episodesFromTopicsTask.get();
final ForkJoinTask<Set<OrfOnVideoInfoDTO>> videoEpisodeTask = forkJoinPool.submit(new OrfOnEpisodeTask(this, new ConcurrentLinkedQueue<>(episodesFromTopics)));
final Set<OrfOnVideoInfoDTO> videoEpisode = videoEpisodeTask.get();
return videoEpisode;

}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ public class OrfOnEpisodeDeserializer implements JsonDeserializer<OrfOnVideoInfo
private static final String TAG_SHARE_BODY = "share_body";
private static final String TAG_RIGHT = "right";
private static final String TAG_VIDEO_TYPE ="video_type";
private static final String[] TAG_SEGMENTS = {"_links", "segments", "href"};
private static final String[] TAG_SUBTITLE = {"_links", "subtitle", "href"};
private static final String TAG_VIDEO = "sources";
private static final String TAG_VIDEO_QUALITY = "quality_key";
private static final String[] TAG_VIDEO_PATH_1 = {"_embedded","segments"};
private static final String[] TAG_VIDEO_PATH_2 = {"_embedded", "playlist", "sources"};
private static final String TAG_VIDEO_URL = "src";
//
private static final String TAG_VIDEO_CODEC = "delivery";
private static final String TAG_VIDEO_QUALITY = "quality";
private static final String[] TAG_SUBTITLE_SECTION = {"_embedded", "subtitle"};
private static final String TAG_SUBTITLE_SMI = "sami_url";
private static final String TAG_SUBTITLE_SRT = "srt_url";
Expand All @@ -57,6 +57,7 @@ public class OrfOnEpisodeDeserializer implements JsonDeserializer<OrfOnVideoInfo
private static final String TAG_SUBTITLE_XML = "xml_url";
//
private AbstractCrawler crawler = null;
private static final String[] PREFERED_CODEC = {"hls", "hds", "streaming", "progressive"};
//

public OrfOnEpisodeDeserializer(AbstractCrawler crawler) {
Expand Down Expand Up @@ -149,58 +150,49 @@ private Optional<URL> toURL(String aString) {
}

private Optional<Map<Resolution, FilmUrl>> parseUrl(JsonElement jsonElement) {

for (Map.Entry<String, JsonElement> entry : jsonElement.getAsJsonObject().getAsJsonObject(TAG_VIDEO).entrySet()) {

if (!"hlshdssmoothdashprogressive_download".contains(entry.getKey())) {
LOG.debug("unkown video type {} ", jsonElement);
Optional<JsonElement> videoPath1 = JsonUtils.getElement(jsonElement, TAG_VIDEO_PATH_1);
if (videoPath1.isEmpty() || !videoPath1.get().isJsonArray()) {
return Optional.empty();
}
Optional<JsonElement> videoPath2 = JsonUtils.getElement(videoPath1.get().getAsJsonArray().get(0), TAG_VIDEO_PATH_2);
if (videoPath2.isEmpty() || !videoPath2.get().isJsonArray()) {
return Optional.empty();
}
for (String key : PREFERED_CODEC) {
Optional<Map<Resolution,FilmUrl>> resultingVideos = readVideoForTargetCodec(videoPath2.get(),key);
if (resultingVideos.isPresent()) {
return resultingVideos;
}
}

Optional<Map<Resolution, FilmUrl>> urls = Optional.empty();
Optional<String> codec = Optional.empty(); //
if (jsonElement.getAsJsonObject().has(TAG_VIDEO) &&
jsonElement.getAsJsonObject().getAsJsonObject(TAG_VIDEO).has("progressive_download")) {
codec = Optional.of("progressive_download");
} else if (jsonElement.getAsJsonObject().has(TAG_VIDEO) &&
jsonElement.getAsJsonObject().getAsJsonObject(TAG_VIDEO).has("hls")) {
codec = Optional.of("hls");
} else if (jsonElement.getAsJsonObject().has(TAG_VIDEO) &&
jsonElement.getAsJsonObject().getAsJsonObject(TAG_VIDEO).has("hds")) {
codec = Optional.of("hds");
} else if (jsonElement.getAsJsonObject().has(TAG_VIDEO) &&
jsonElement.getAsJsonObject().getAsJsonObject(TAG_VIDEO).has("smooth")) {
codec = Optional.of("smooth");
} else if (jsonElement.getAsJsonObject().has(TAG_VIDEO) &&
jsonElement.getAsJsonObject().getAsJsonObject(TAG_VIDEO).has("dash")) {
codec = Optional.of("dash");
}
if (codec.isPresent()) {
urls = Optional.of(new EnumMap<>(Resolution.class));
for (JsonElement codecUrls : jsonElement.getAsJsonObject().getAsJsonObject(TAG_VIDEO).getAsJsonArray(codec.get())) {
return Optional.empty();
}

private Optional<Map<Resolution, FilmUrl>> readVideoForTargetCodec(JsonElement urlArray, String targetCodec) {
Map<Resolution, FilmUrl> urls = new EnumMap<>(Resolution.class);
for (JsonElement videoElement : urlArray.getAsJsonArray()) {
Optional<String> codec = JsonUtils.getElementValueAsString(videoElement, TAG_VIDEO_CODEC);
Optional<String> quality = JsonUtils.getElementValueAsString(videoElement, TAG_VIDEO_QUALITY);
Optional<String> url = JsonUtils.getElementValueAsString(videoElement, TAG_VIDEO_URL);
if (url.isPresent() && codec.isPresent() && quality.isPresent() && targetCodec.equalsIgnoreCase(codec.get())) {
try {
String qualityString = codecUrls.getAsJsonObject().get(TAG_VIDEO_QUALITY).getAsString();
String url = codecUrls.getAsJsonObject().get(TAG_VIDEO_URL).getAsString();
urls.get().put(
OrfOnEpisodeDeserializer.getQuality(qualityString).get(),
new FilmUrl(url, 0L)
);
} catch (Exception e) {
LOG.error(
"parseUrl failed for quality {} and url {} exception {}",
codecUrls.getAsJsonObject().get("quality_key").getAsString(),
codecUrls.getAsJsonObject().get("src").getAsString(),
e
);
long fileSize = crawler.determineFileSizeInKB(url.get());
urls.put(
OrfOnEpisodeDeserializer.getQuality(quality.get()).get(),
new FilmUrl(url.get(), fileSize)
);
} catch (MalformedURLException e) {
LOG.error("Malformed video url {} {}", url.get(), e);
}
}
if (urls.get().size() == 0) {
return Optional.empty();
}
}
return urls;
if (urls.isEmpty()) {
Optional.empty();
}
return Optional.of(urls);
}


private Optional<URL> parseWebsite(Optional<String> text) {
Optional<URL> result = Optional.empty();
if (text.isPresent()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,33 @@
import com.google.gson.*;

import de.mediathekview.mserver.base.utils.JsonUtils;
import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
import de.mediathekview.mserver.crawler.basic.PagedElementListDTO;
import de.mediathekview.mserver.crawler.orfon.OrfOnVideoInfoDTO;
import de.mediathekview.mserver.crawler.orfon.OrfOnBreadCrumsUrlDTO;

import java.lang.reflect.Type;
import java.util.Optional;

public class OrfOnEpisodesDeserializer implements JsonDeserializer<PagedElementListDTO<OrfOnVideoInfoDTO>> {
public class OrfOnEpisodesDeserializer implements JsonDeserializer<PagedElementListDTO<OrfOnBreadCrumsUrlDTO>> {
private static final String[] TAG_NEXT_PAGE = {"_links", "next", "href"};
private static final String[] TAG_ITEMS = {"_embedded", "items"};
private OrfOnEpisodeDeserializer itemDeserializer = null;

public OrfOnEpisodesDeserializer(AbstractCrawler crawler) {
itemDeserializer = new OrfOnEpisodeDeserializer(crawler);
}
private static final String TAG_EPISODE_ID = "id";
private static final String[] TAG_EPISODE_LINK = { "_links", "self", "href"};

@Override
public PagedElementListDTO<OrfOnVideoInfoDTO> deserialize(
public PagedElementListDTO<OrfOnBreadCrumsUrlDTO> deserialize(
final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context)
throws JsonParseException {
JsonObject jsonPage = jsonElement.getAsJsonObject();
//
PagedElementListDTO<OrfOnVideoInfoDTO> page = new PagedElementListDTO<>();
PagedElementListDTO<OrfOnBreadCrumsUrlDTO> page = new PagedElementListDTO<>();
page.setNextPage(JsonUtils.getElementValueAsString(jsonElement, TAG_NEXT_PAGE));
//
final Optional<JsonElement> items = JsonUtils.getElement(jsonPage, TAG_ITEMS);
if (items.isPresent() && items.get().isJsonArray()) {
for (JsonElement item : items.get().getAsJsonArray()) {
page.addElement(itemDeserializer.deserialize(item, null, null));
Optional<String> episodeId = JsonUtils.getElementValueAsString(item, TAG_EPISODE_ID);
Optional<String> episodeLink = JsonUtils.getElementValueAsString(item, TAG_EPISODE_LINK);
episodeLink.ifPresent( link -> page.addElement(new OrfOnBreadCrumsUrlDTO(episodeId.orElse("EMPTY"), link)));
}
}
return page;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,103 +1,41 @@
package de.mediathekview.mserver.crawler.orfon.task;

import java.lang.reflect.Type;
import java.net.URI;
import java.util.Optional;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import com.google.gson.JsonDeserializer;
import com.google.gson.reflect.TypeToken;

import de.mediathekview.mserver.crawler.basic.AbstractCrawler;
import de.mediathekview.mserver.crawler.basic.AbstractJsonRestTask;
import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask;
import de.mediathekview.mserver.crawler.basic.PagedElementListDTO;
import de.mediathekview.mserver.crawler.orfon.OrfOnBreadCrumsUrlDTO;
import de.mediathekview.mserver.crawler.orfon.OrfOnConstants;
import de.mediathekview.mserver.crawler.orfon.OrfOnVideoInfoDTO;
import de.mediathekview.mserver.crawler.orfon.json.OrfOnEpisodesDeserializer;
import jakarta.ws.rs.core.Response;

// <T, R, D extends CrawlerUrlDTO> extends AbstractRestTask<T, D>
// return T Class from this task, desirialisation of class R , D , Reasearch in this url
public class OrfOnEpisodesTask extends AbstractJsonRestTask<OrfOnVideoInfoDTO, PagedElementListDTO<OrfOnVideoInfoDTO>, OrfOnBreadCrumsUrlDTO> {
public class OrfOnEpisodesTask extends OrfOnPagedTask {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LogManager.getLogger(OrfOnEpisodesTask.class);

public OrfOnEpisodesTask(AbstractCrawler crawler, Queue<OrfOnBreadCrumsUrlDTO> urlToCrawlDTOs) {
super(crawler, urlToCrawlDTOs, OrfOnConstants.AUTH);
}

@Override
protected JsonDeserializer<PagedElementListDTO<OrfOnVideoInfoDTO>> getParser(OrfOnBreadCrumsUrlDTO aDTO) {
return new OrfOnEpisodesDeserializer(this.crawler);
super(crawler, urlToCrawlDTOs);
}

@Override
protected Type getType() {
return new TypeToken<PagedElementListDTO<OrfOnVideoInfoDTO>>() {}.getType();
public JsonDeserializer<PagedElementListDTO<OrfOnBreadCrumsUrlDTO>> getParser(OrfOnBreadCrumsUrlDTO aDTO) {
return new OrfOnEpisodesDeserializer();
}

@Override
protected void postProcessing(PagedElementListDTO<OrfOnVideoInfoDTO> aResponseObj, OrfOnBreadCrumsUrlDTO aDTO) {
final Optional<AbstractRecursiveConverterTask<OrfOnVideoInfoDTO, OrfOnBreadCrumsUrlDTO>> subpageCrawler;
if (aResponseObj.getNextPage().isPresent()) {
final Queue<OrfOnBreadCrumsUrlDTO> nextPageLinks = new ConcurrentLinkedQueue<>();
nextPageLinks.add(new OrfOnBreadCrumsUrlDTO("", aResponseObj.getNextPage().get()));
subpageCrawler = Optional.of(createNewOwnInstance(nextPageLinks));
subpageCrawler.get().fork();
LOG.debug("started paging to url {} for {}", aResponseObj.getNextPage().get(), aDTO.getUrl());
} else {
subpageCrawler = Optional.empty();
}
//
for (OrfOnVideoInfoDTO rs : aResponseObj.getElements()) {
if (rs.getTitle().isEmpty() && rs.getTitleWithDate().isEmpty()) {
LOG.warn("Missing title for {} in {}", rs.getId(), aDTO);
crawler.incrementAndGetErrorCount();
return;
}
if (rs.getTopic().isEmpty()) {
LOG.warn("Missing topic for {} in {}", rs.getId(), aDTO);
crawler.incrementAndGetErrorCount();
return;
}
if (rs.getVideoUrls().isEmpty()) {
LOG.warn("Missing videoUrls for {} in {}", rs.getId(), aDTO);
crawler.incrementAndGetErrorCount();
return;
}
if (rs.getDuration().isEmpty()) {
LOG.warn("Missing duration for {} in {}", rs.getId(), aDTO);
}
if (rs.getAired().isEmpty()) {
LOG.warn("Missing aired date for {} in {}", rs.getId(), aDTO);
}
if (rs.getWebsite().isEmpty()) {
LOG.warn("Missing website for {} in {}", rs.getId(), aDTO);
}
taskResults.add(rs);
}
subpageCrawler.ifPresent(paginationResults -> taskResults.addAll(paginationResults.join()));
public Type getType() {
return new TypeToken<PagedElementListDTO<OrfOnBreadCrumsUrlDTO>>() {}.getType();
}

@Override
protected AbstractRecursiveConverterTask<OrfOnVideoInfoDTO, OrfOnBreadCrumsUrlDTO> createNewOwnInstance(Queue<OrfOnBreadCrumsUrlDTO> aElementsToProcess) {
public AbstractRecursiveConverterTask<OrfOnBreadCrumsUrlDTO, OrfOnBreadCrumsUrlDTO> createNewOwnInstance(Queue<OrfOnBreadCrumsUrlDTO> aElementsToProcess) {
return new OrfOnEpisodesTask(crawler, aElementsToProcess);
}


@Override
protected void handleHttpError(OrfOnBreadCrumsUrlDTO dto, URI url, Response response) {
crawler.printErrorMessage();
LOG.fatal(
"A HTTP error {} occurred when getting REST information from: \"{}\".",
response.getStatus(),
url);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,12 @@ private Map<String, OrfOnVideoInfoDTO> generateExpectedResult() {
Optional.of(new URL("https://tvthek.orf.at/profile/Servus-Kasperl/3272601/Servus-Kasperl-Kasperl-Strolchi-Koko-und-Maximilian/14207792")),
Optional.of(List.of(GeoLocations.GEO_NONE)),
Optional.of(new URL("https://api-tvthek.orf.at/api/v4.3/subtitle/885340")),
Optional.of(Map.of(Resolution.NORMAL, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-worldwide/2024-01-04_0707_tl_01_Servus-Kasperl-_____14207792__o__6332192865__s15543049_9__ORF1HD_07081012P_07300711P_QXB.mp4/playlist.m3u8", 0L))),
Optional.of(Map.of(
Resolution.HD, new FilmUrl("https://dapasfiis.sf.apa.at/ipad/cms-worldwide/2024-01-04_0707_tl_01_Servus-Kasperl-_____14207792__o__6332192865__s15543049_9__ORF1HD_07081012P_07300711P_Q8C.mp4/playlist.m3u8", 0L),
Resolution.NORMAL, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-worldwide/2024-01-04_0707_tl_01_Servus-Kasperl-_____14207792__o__6332192865__s15543049_9__ORF1HD_07081012P_07300711P_QXB.mp4/playlist.m3u8", 0L),
Resolution.SMALL, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-worldwide/2024-01-04_0707_tl_01_Servus-Kasperl-_____14207792__o__6332192865__s15543049_9__ORF1HD_07081012P_07300711P_Q4A.mp4/playlist.m3u8", 0L),
Resolution.VERY_SMALL, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-worldwide/2024-01-04_0707_tl_01_Servus-Kasperl-_____14207792__o__6332192865__s15543049_9__ORF1HD_07081012P_07300711P_Q1A.3gp/playlist.m3u8", 0L)
)),
Optional.of(Set.of(
new URL("https://api-tvthek.orf.at/assets/subtitles/0166/92/07aead27b4c0b09b36750db54b8ce15ff9b8499c.ttml"),
new URL("https://api-tvthek.orf.at/assets/subtitles/0166/92/4dd6932d7cf6ceaad90a536c3e03981267e32941.vtt"),
Expand All @@ -92,7 +97,12 @@ private Map<String, OrfOnVideoInfoDTO> generateExpectedResult() {
Optional.of(new URL("https://tvthek.orf.at/profile/ABC-Baer/4611813/ABC-Baer/14207790")),
Optional.of(List.of(GeoLocations.GEO_AT)),
Optional.of(new URL("https://api-tvthek.orf.at/api/v4.3/subtitle/885332")),
Optional.of(Map.of(Resolution.NORMAL, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-austria/2024-01-04_0645_tl_00_ABC-Baer_____14207790__o__4346842346__s15542921_1__KIDS1_06363007P_06500003P_QXB.mp4/playlist.m3u8", 0L))),
Optional.of(Map.of(
Resolution.HD, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-austria/2024-01-04_0645_tl_00_ABC-Baer_____14207790__o__4346842346__s15542921_1__KIDS1_06363007P_06500003P_Q8C.mp4/playlist.m3u8", 0L),
Resolution.NORMAL, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-austria/2024-01-04_0645_tl_00_ABC-Baer_____14207790__o__4346842346__s15542921_1__KIDS1_06363007P_06500003P_QXB.mp4/playlist.m3u8", 0L),
Resolution.SMALL, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-austria/2024-01-04_0645_tl_00_ABC-Baer_____14207790__o__4346842346__s15542921_1__KIDS1_06363007P_06500003P_Q4A.mp4/playlist.m3u8", 0L),
Resolution.VERY_SMALL, new FilmUrl("https://apasfiis.sf.apa.at/ipad/cms-austria/2024-01-04_0645_tl_00_ABC-Baer_____14207790__o__4346842346__s15542921_1__KIDS1_06363007P_06500003P_Q1A.3gp/playlist.m3u8", 0L)
)),
Optional.of(Set.of(
new URL("https://api-tvthek.orf.at/assets/subtitles/0166/92/b682365a2a3fd1d45a2f029a597735a9df5b7524.ttml"),
new URL("https://api-tvthek.orf.at/assets/subtitles/0166/92/20c53ed98f58a5045da663191516bc7fbf09e3d2.srt"),
Expand Down
Loading

0 comments on commit 6d67553

Please sign in to comment.