-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
614 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
src/main/java/de/mediathekview/mserver/crawler/artem/ArteMConstants.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
package de.mediathekview.mserver.crawler.artem; | ||
|
||
public final class ArteMConstants { | ||
// | ||
public static final int PAGE_LIMIT = 100; | ||
// | ||
public static final String HOST = "https://api.arte.tv"; | ||
// | ||
public static final String ALL_VIDEOS = HOST + "/api/opa/v3/videos?language=de&sort=-lastModified&limit=" + PAGE_LIMIT; | ||
// | ||
public static final String AUTH = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA"; | ||
|
||
private ArteMConstants() {} | ||
// | ||
} |
62 changes: 62 additions & 0 deletions
62
src/main/java/de/mediathekview/mserver/crawler/artem/ArteMCrawler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package de.mediathekview.mserver.crawler.artem; | ||
|
||
import de.mediathekview.mlib.daten.Film; | ||
import de.mediathekview.mlib.daten.Sender; | ||
import de.mediathekview.mlib.messages.listener.MessageListener; | ||
import de.mediathekview.mserver.base.config.MServerConfigManager; | ||
import de.mediathekview.mserver.base.messages.ServerMessages; | ||
import de.mediathekview.mserver.crawler.basic.AbstractCrawler; | ||
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; | ||
import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; | ||
import de.mediathekview.mserver.crawler.kika.json.KikaApiFilmDto; | ||
import de.mediathekview.mserver.crawler.kika.tasks.*; | ||
import de.mediathekview.mserver.progress.listeners.SenderProgressListener; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
|
||
import java.util.Collection; | ||
import java.util.Queue; | ||
import java.util.Set; | ||
import java.util.concurrent.ConcurrentLinkedQueue; | ||
import java.util.concurrent.ForkJoinPool; | ||
import java.util.concurrent.RecursiveTask; | ||
|
||
public class ArteMCrawler extends AbstractCrawler { | ||
private static final Logger LOG = LogManager.getLogger(ArteMCrawler.class); | ||
|
||
public ArteMCrawler( | ||
final ForkJoinPool aForkJoinPool, | ||
final Collection<MessageListener> aMessageListeners, | ||
final Collection<SenderProgressListener> aProgressListeners, | ||
final MServerConfigManager aRootConfig) { | ||
super(aForkJoinPool, aMessageListeners, aProgressListeners, aRootConfig); | ||
} | ||
|
||
@Override | ||
public Sender getSender() { | ||
return Sender.ARTE_DE; | ||
} | ||
|
||
@Override | ||
protected RecursiveTask<Set<Film>> createCrawlerTask() { | ||
|
||
try { | ||
// get all brands from json doc | ||
final Queue<CrawlerUrlDTO> root = new ConcurrentLinkedQueue<>(); | ||
root.add(new CrawlerUrlDTO(ArteMConstants.ALL_VIDEOS)); | ||
final ArteMVideoTask arteMVideoTask = new ArteMVideoTask(this, root, ArteMConstants.AUTH, 0); | ||
final Queue<ArteMVideoDto> videos = new ConcurrentLinkedQueue<>(); | ||
videos.addAll(arteMVideoTask.fork().join()); | ||
// | ||
printMessage(ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), videos.size()); | ||
getAndSetMaxCount(videos.size()); | ||
// | ||
return new ArteMStreamTask(this, videos, ArteMConstants.AUTH, 0); | ||
} catch (final Exception ex) { | ||
LOG.fatal("Exception in ARTE_DE crawler.", ex); | ||
} | ||
|
||
return null; | ||
} | ||
|
||
} |
65 changes: 65 additions & 0 deletions
65
src/main/java/de/mediathekview/mserver/crawler/artem/ArteMSreamDeserializer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package de.mediathekview.mserver.crawler.artem; | ||
|
||
import com.google.gson.*; | ||
|
||
import de.mediathekview.mserver.base.utils.JsonUtils; | ||
import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; | ||
import java.lang.reflect.Type; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
|
||
|
||
public class ArteMSreamDeserializer implements JsonDeserializer<PagedElementListDTO<ArteMStreamDto>> { | ||
private static final String NEXT_PAGE[] = {"meta","videoStreams", "links", "next", "href"}; | ||
private static final String ELEMENT_STREAMS = "videoStreams"; | ||
private static final String ATTR_LANGUAGE = "language"; | ||
private static final String ATTR_QUALITY = "quality"; | ||
private static final String ATTR_MIMETYPE = "mimeType"; | ||
private static final String ATTR_AUDIOCODE = "audioCode"; | ||
private static final String ATTR_URL = "url"; | ||
private static final String ELEMENT_SUBTITLES = "subtitles"; | ||
private static final String ATTR_SUBTITLES_VERSION = "version"; | ||
private static final String ATTR_SUBTITLES_FILENAME = "filename"; | ||
|
||
@Override | ||
public PagedElementListDTO<ArteMStreamDto> deserialize( | ||
final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) | ||
throws JsonParseException { | ||
// | ||
PagedElementListDTO<ArteMStreamDto> list = new PagedElementListDTO<>(); | ||
// | ||
list.setNextPage(JsonUtils.getElementValueAsString(jsonElement, NEXT_PAGE)); | ||
// | ||
Optional<JsonElement> videos = JsonUtils.getElement(jsonElement, ELEMENT_STREAMS); | ||
if (videos.isEmpty()) { | ||
return list; | ||
} | ||
Optional<JsonElement> subtitle = JsonUtils.getElement(jsonElement, ELEMENT_SUBTITLES); | ||
Optional<Map<String,String>> subtitleStreams = Optional.empty(); | ||
if (subtitle.isPresent()) { | ||
Map<String,String> subtitleEntries = new HashMap<>(); | ||
for (JsonElement sub : subtitle.get().getAsJsonArray()) { | ||
subtitleEntries.put( | ||
JsonUtils.getElementValueAsString(sub, ATTR_SUBTITLES_VERSION).get(), | ||
JsonUtils.getElementValueAsString(sub, ATTR_SUBTITLES_FILENAME).get() | ||
); | ||
} | ||
subtitleStreams = Optional.of(subtitleEntries); | ||
} | ||
|
||
for (JsonElement stream : videos.get().getAsJsonArray()) { | ||
list.addElement(new ArteMStreamDto( | ||
JsonUtils.getElementValueAsString(stream, ATTR_LANGUAGE), | ||
JsonUtils.getElementValueAsString(stream, ATTR_QUALITY), | ||
JsonUtils.getElementValueAsString(stream, ATTR_MIMETYPE), | ||
JsonUtils.getElementValueAsString(stream, ATTR_AUDIOCODE), | ||
JsonUtils.getElementValueAsString(stream, ATTR_URL), | ||
subtitleStreams)); | ||
} | ||
|
||
return list; | ||
} | ||
|
||
|
||
} |
44 changes: 44 additions & 0 deletions
44
src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamDto.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package de.mediathekview.mserver.crawler.artem; | ||
|
||
import java.util.Map; | ||
import java.util.Optional; | ||
|
||
public class ArteMStreamDto { | ||
Optional<String> language; | ||
Optional<String> quality; | ||
Optional<String> mimeType; | ||
Optional<String> audioCode; | ||
Optional<String> url; | ||
Optional<Map<String,String>> subtitles; | ||
public ArteMStreamDto(Optional<String> language, Optional<String> quality, Optional<String> mimeType, | ||
Optional<String> audioCode, Optional<String> url, Optional<Map<String, String>> subtitles) { | ||
super(); | ||
this.language = language; | ||
this.quality = quality; | ||
this.mimeType = mimeType; | ||
this.audioCode = audioCode; | ||
this.url = url; | ||
this.subtitles = subtitles; | ||
} | ||
public Optional<String> getLanguage() { | ||
return language; | ||
} | ||
public Optional<String> getQuality() { | ||
return quality; | ||
} | ||
public Optional<String> getMimeType() { | ||
return mimeType; | ||
} | ||
public Optional<String> getAudioCode() { | ||
return audioCode; | ||
} | ||
public Optional<String> getUrl() { | ||
return url; | ||
} | ||
public Optional<Map<String, String>> getSubtitles() { | ||
return subtitles; | ||
} | ||
|
||
|
||
|
||
} |
148 changes: 148 additions & 0 deletions
148
src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamTask.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
package de.mediathekview.mserver.crawler.artem; | ||
|
||
import java.lang.reflect.Type; | ||
import java.net.URI; | ||
import java.net.URL; | ||
import java.time.Duration; | ||
import java.time.LocalDateTime; | ||
import java.time.temporal.TemporalUnit; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.Queue; | ||
import java.util.Set; | ||
import java.util.UUID; | ||
import java.util.concurrent.ConcurrentLinkedQueue; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
|
||
import com.google.gson.reflect.TypeToken; | ||
|
||
import de.mediathekview.mlib.daten.Film; | ||
import de.mediathekview.mlib.daten.GeoLocations; | ||
import de.mediathekview.mserver.crawler.basic.AbstractCrawler; | ||
import de.mediathekview.mserver.crawler.basic.AbstractJsonRestTask; | ||
import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; | ||
import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; | ||
import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; | ||
import jakarta.ws.rs.core.Response; | ||
|
||
// <T, R, D extends CrawlerUrlDTO> extends AbstractRestTask<T, D> | ||
// return T Class from this task, desirialisation of class R , D , Reasearch in this url | ||
public class ArteMStreamTask extends AbstractJsonRestTask<Film, PagedElementListDTO<ArteMStreamDto>, ArteMVideoDto> { | ||
private static final long serialVersionUID = 1L; | ||
private static final Logger LOG = LogManager.getLogger(ArteMStreamTask.class); | ||
private int subPageIndex = 0; | ||
|
||
protected ArteMStreamTask(AbstractCrawler crawler, Queue<ArteMVideoDto> urlToCrawlDTOs, String authKey, int subPageIndex) { | ||
super(crawler, urlToCrawlDTOs, authKey); | ||
this.subPageIndex = subPageIndex; | ||
} | ||
|
||
@Override | ||
protected Type getType() { | ||
return new TypeToken<List<ArteMVideoDto>>() {}.getType(); | ||
} | ||
|
||
@Override | ||
protected void handleHttpError(ArteMVideoDto dto, URI url, Response response) { | ||
crawler.printErrorMessage(); | ||
LOG.fatal( | ||
"A HTTP error {} occurred when getting REST information from: \"{}\".", | ||
response.getStatus(), | ||
url); | ||
} | ||
|
||
@Override | ||
protected void postProcessing(PagedElementListDTO<ArteMStreamDto> aResponseObj, ArteMVideoDto aDTO) { | ||
final Optional<AbstractRecursiveConverterTask<Film, ArteMVideoDto>> subpageCrawler; | ||
final Optional<String> nextPageLink = aResponseObj.getNextPage(); | ||
if (nextPageLink.isPresent() && config.getMaximumSubpages() > subPageIndex) { | ||
final Queue<ArteMVideoDto> nextPageLinks = new ConcurrentLinkedQueue<>(); | ||
ArteMVideoDto np = new ArteMVideoDto(aDTO); | ||
np.setUrl(nextPageLink.get()); | ||
nextPageLinks.add(np); | ||
subpageCrawler = Optional.of(createNewOwnInstance(nextPageLinks)); | ||
subpageCrawler.get().fork(); | ||
} else { | ||
subpageCrawler = Optional.empty(); | ||
} | ||
// Trailer | ||
if (!aDTO.getPlatform().orElse("").equalsIgnoreCase("EXTRAIT")) { | ||
Set<ArteMStreamDto> streams = aResponseObj.getElements(); | ||
taskResults.add(createFilm(aDTO, streams)); | ||
} | ||
// | ||
|
||
|
||
} | ||
|
||
@Override | ||
protected Object getParser(ArteMVideoDto aDTO) { | ||
return new ArteMSreamDeserializer(); | ||
} | ||
|
||
@Override | ||
protected AbstractRecursiveConverterTask<Film, ArteMVideoDto> createNewOwnInstance( | ||
Queue<ArteMVideoDto> aElementsToProcess) { | ||
return new ArteMStreamTask(crawler, aElementsToProcess, getAuthKey().orElse(""), subPageIndex+1); | ||
} | ||
|
||
private Film createFilm(ArteMVideoDto filmData, Set<ArteMStreamDto> streams) { | ||
Film film = new Film( | ||
UUID.randomUUID(), | ||
crawler.getSender(), | ||
filmData.getSubtitle().orElse(""), | ||
filmData.getTitle().get(), | ||
parseDate(filmData.getCreationDate().get()).get(), | ||
parseDuration(filmData.getDurationSeconds().get()).get() | ||
); | ||
film.setBeschreibung(filmData.getShortDescription().get()); | ||
film.setWebsite(parseWebsite(filmData.getWebsite().get()).get()); | ||
film.addGeolocation(parseGeo(filmData.getGeoblockingZone().get())); | ||
streams.stream().findAny().get().getSubtitles(); | ||
return film; | ||
} | ||
|
||
private Set<URL> parseSubtitle(Optional<Map<String, String>> data) { | ||
return null; | ||
} | ||
|
||
private GeoLocations parseGeo(String data) { | ||
switch(data) { | ||
case "ALL": | ||
return GeoLocations.GEO_NONE; | ||
} | ||
return GeoLocations.GEO_NONE; | ||
} | ||
|
||
private Optional<LocalDateTime> parseDate(String date) { | ||
try { | ||
return Optional.of(LocalDateTime.parse(date)); | ||
} catch (Exception e) { | ||
|
||
} | ||
return Optional.empty(); | ||
} | ||
|
||
private Optional<Duration> parseDuration(String data) { | ||
try { | ||
return Optional.of(Duration.ofSeconds(Long.parseLong(data))); | ||
} catch (Exception e) { | ||
|
||
} | ||
return Optional.empty(); | ||
} | ||
|
||
private Optional<URL> parseWebsite(String data) { | ||
try { | ||
return Optional.of(new URL(data)); | ||
} catch (Exception e) { | ||
|
||
} | ||
return Optional.empty(); | ||
} | ||
|
||
} |
Oops, something went wrong.