Skip to content

Commit

Permalink
ARTE: migrate to new category api
Browse files Browse the repository at this point in the history
  • Loading branch information
alex1702 committed Sep 14, 2023
2 parents 60eaf91 + 5ab1854 commit 0158f50
Show file tree
Hide file tree
Showing 8 changed files with 1,184 additions and 1,372 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
group = 'de.mediathekview'
archivesBaseName = "MServer"
version = '3.1.218'
version = '3.1.219'

def jarName = 'MServer.jar'
def mainClass = 'mServer.Main'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
*/
public class ArteCategoryFilmListDeserializer implements JsonDeserializer<ArteCategoryFilmsDTO> {

private static final String JSON_ELEMENT_CONTENT = "content";
private static final String JSON_ELEMENT_DATA = "data";
private static final String JSON_ELEMENT_NEXTPAGE = "nextPage";
private static final String JSON_ELEMENT_PROGRAMID = "programId";
private static final String JSON_ELEMENT_VALUE = "value";
private static final String JSON_ELEMENT_ZONES = "zones";

@Override
public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, JsonDeserializationContext aContext) throws JsonParseException {
Expand All @@ -29,36 +30,37 @@ public ArteCategoryFilmsDTO deserialize(JsonElement aJsonElement, Type aType, Js
if(aJsonElement.getAsJsonObject().has(JSON_ELEMENT_VALUE)) {
rootElement = aJsonElement.getAsJsonObject().get(JSON_ELEMENT_VALUE);
}
final JsonElement dataElement = rootElement.getAsJsonObject().get(JSON_ELEMENT_DATA);
if (dataElement == null || dataElement.isJsonNull() || !dataElement.isJsonArray()) {
Log.errorLog(12834940, "data element not found");
final JsonElement zoneElement = rootElement.getAsJsonObject().get(JSON_ELEMENT_ZONES);
if (zoneElement == null || zoneElement.isJsonNull() || !zoneElement.isJsonArray()) {
Log.errorLog(12834940, "zones element not found");
return dto;
}

for (JsonElement jsonElement : dataElement.getAsJsonArray()) {
String programId = jsonElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).getAsString();
if (programId != null) {
if (programId.startsWith("RC-")) {
try {
long collectionId = Long.parseLong(programId.replace("RC-", ""));
dto.addCollection(String.format("RC-%06d", collectionId));
} catch (NumberFormatException e) {
Log.errorLog(12834939, "Invalid collection id: " + programId);
for (JsonElement jsonElement : zoneElement.getAsJsonArray()) {
if(jsonElement.getAsJsonObject().has(JSON_ELEMENT_CONTENT)) {
final JsonObject contentObject = jsonElement.getAsJsonObject().get(JSON_ELEMENT_CONTENT).getAsJsonObject();
if (contentObject.has(JSON_ELEMENT_DATA)) {
for(JsonElement dataElement : contentObject.get(JSON_ELEMENT_DATA).getAsJsonArray()) {
String programId = dataElement.getAsJsonObject().get(JSON_ELEMENT_PROGRAMID).getAsString();
if (programId != null) {
if (programId.startsWith("RC-")) {
try {
long collectionId = Long.parseLong(programId.replace("RC-", ""));
dto.addCollection(String.format("RC-%06d", collectionId));
} catch (NumberFormatException e) {
Log.errorLog(12834939, "Invalid collection id: " + programId);
}
} else {
dto.addProgramId(programId);
}
}
}
} else {
dto.addProgramId(programId);
}
}
}

dto.setNextPage(hasNextPage(rootElement.getAsJsonObject()));
dto.setNextPage(false);

return dto;
}

private static boolean hasNextPage(JsonObject aJsonObject) {

JsonElement nextPageElement = aJsonObject.get(JSON_ELEMENT_NEXTPAGE);
return !nextPageElement.isJsonNull();
}
}
58 changes: 20 additions & 38 deletions src/main/java/mServer/crawler/sender/arte/MediathekArte.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,25 +45,23 @@ public class MediathekArte extends MediathekReader {
private static final Logger LOG = LogManager.getLogger(MediathekArte.class);
private static final String ARTE_API_TAG_URL_PATTERN = "https://api.arte.tv/api/opa/v3/videos?channel=%s&arteSchedulingDay=%s";

private static final String URL_SUBCATEGORY
= "https://www.arte.tv/api/rproxy/emac/v3/%s/web/data/MOST_RECENT_SUBCATEGORY/?subCategoryCode=%s&page=%s&limit=50";

private static final String[] SUBCATEGORIES = new String[]{
"WEB", "AUT",
"AJO", "AUV", "KUL", "DCY", "ENQ", "JUN",
"ACC", "CMG", "FLM", "CMU", "MCL",
"CHU", "FIC", "SES",
"ART", "POP", "IDE",
"ADS", "BAR", "CLA", "JAZ", "MUA", "MUD", "OPE", "MUE", "HIP", "MET",
"ENB", "ENN", "SAN", "TEC",
"ATA", "EVA", "NEA", "VIA",
"CIV", "LGP", "XXE"
private static final String URL_CATEGORY = "https://www.arte.tv/api/rproxy/emac/v4/%s/web/pages/%s";

private static final String[] CATEGORIES = {
"ARS",
"DOR",
"CIN",
"SER",
"ACT",
"CPO",
"SCI",
"DEC",
"HIS"
};

private static final String COLLECTION_URL = "https://api.arte.tv/api/opa/v3/programs/%s/%s";

private static final DateTimeFormatter ARTE_API_DATEFORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd");
private static final boolean PARSE_SUBCATEGORY_SUB_PAGES = false; // Flag, ob Unterseiten der Unterkategorien verarbeitet werden soll

public static final String ARTE_EN = "ARTE.EN";
public static final String ARTE_ES = "ARTE.ES";
Expand All @@ -86,7 +84,7 @@ protected synchronized void meldungStart() {
super.meldungStart();

senderLanguages.put(Const.ARTE_DE, "de");
senderLanguages.put(Const.ARTE_FR, "fr");
// senderLanguages.put(Const.ARTE_FR, "fr");
/*if (LocalDate.now().getDayOfYear() % 2 == 0) {
senderLanguages.put(ARTE_EN, "en");
senderLanguages.put(ARTE_ES, "es");
Expand Down Expand Up @@ -146,9 +144,9 @@ public void addToList() {

private void addCategories() {
senderLanguages.forEach((sender, langCode) -> {
for (String subCategory : SUBCATEGORIES) {
String subCategoryUrl = String.format(URL_SUBCATEGORY, langCode.toLowerCase(), subCategory, 1);
listeThemen.add(new String[]{sender, langCode, subCategory, subCategoryUrl});
for (String category : CATEGORIES) {
String categoryUrl = String.format(URL_CATEGORY, langCode.toLowerCase(), category);
listeThemen.add(new String[]{sender, langCode, category, categoryUrl});
}
});
}
Expand Down Expand Up @@ -213,15 +211,15 @@ public void run() {
String[] link;
while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) {
meldungProgress(link[2] + "/" + link[3] /* url */);
loadSubCategory(link[0], link[1], link[2], link[3]);
loadCategory(link[0], link[1], link[2], link[3]);
}
} catch (Exception ex) {
Log.errorLog(894330854, ex, "");
}
meldungThreadUndFertig();
}

private void loadSubCategory(String sender, String langCode, String aCategory, String aUrl) {
private void loadCategory(String sender, String langCode, String aCategory, String aUrl) {
Gson gson = new GsonBuilder()
.registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCategoryFilmListDeserializer())
.create();
Expand All @@ -232,29 +230,13 @@ private void loadSubCategory(String sender, String langCode, String aCategory, S
.registerTypeAdapter(ArteCategoryFilmsDTO.class, new ArteCollectionChildDeserializer())
.create();

// erste Seite laden
int i = 2;
ArteCategoryFilmsDTO dto = loadSubCategoryPage(gson, sender, aUrl);
if (dto != null) {
loadCollections(sender, langCode, gsonCollectionParent, gsonCollectionChild, dto);

ArteCategoryFilmsDTO nextDto = dto;
while (PARSE_SUBCATEGORY_SUB_PAGES && nextDto != null && nextDto.hasNextPage()) {

// weitere Seiten laden und zu programId-liste des ersten DTO hinzufügen
String url = String.format(URL_SUBCATEGORY, langCode.toLowerCase(), aCategory, i);
nextDto = loadSubCategoryPage(gson, sender, url);
if (nextDto != null) {
loadCollections(sender, langCode, gsonCollectionParent, gsonCollectionChild, nextDto);
nextDto.getProgramIds().forEach(programId -> dto.addProgramId(programId));
}

i++;
}

Log.sysLog(String.format("%s: %d, %d", aCategory, dto.getProgramIds().size(), dto.getCollectionIds().size()));
// alle programIds verarbeiten
ListeFilme loadedFilme = loadPrograms(sender, langCode, dto);
loadedFilme.forEach((film) -> addFilm(film));
loadedFilme.forEach(film -> addFilm(film));
Log.sysLog(String.format("%s: Subcategory %s: %d Filme", sender, aCategory, loadedFilme.size()));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ public ArteCategoryFilmListDeserializerTest(String aJsonFile, String[] aProgramI
@Parameterized.Parameters
public static Collection<Object[]> data() {
return Arrays.asList(new Object[][]{
{"/arte/arte_subcategory_old_page1.json", new String[]{"078666-012-A", "078664-000-A", "080928-000-A", "074484-000-A", "074485-000-A", "079479-002-A", "080921-000-A", "082406-000-A", "072391-000-A", "080920-000-A"}, true},
{"/arte/arte_subcategory_old_page_last.json", new String[]{"062866-009-A"}, false},
{"/arte/arte_subcategory_page.json", new String[]{"107023-009-A","086862-000-A","107342-038-A","081587-000-A","072442-000-A"}, true}
{"/arte/arte_category.json", new String[]{"112511-000-A", "047389-000-A", "109066-000-A", "082669-000-A", "003982-000-A", "021109-000-A"}, false},
});
}

Expand Down
Loading

0 comments on commit 0158f50

Please sign in to comment.