From 7ce029a051d3b243bc223337d8f6991a520fcfda Mon Sep 17 00:00:00 2001 From: quiniapiezoelectricity <73748843+quiniapiezoelectricity@users.noreply.github.com> Date: Fri, 15 Nov 2024 19:03:52 +0000 Subject: [PATCH] feat(route): add dw route (#17575) * feat(route): add dw route * fix * Apply suggestions from code review Co-authored-by: Tony * Apply suggestions from code review * Apply suggestions with code review * add mp4 video src * fix: preload metadata -------- --- lib/routes/dw/namespace.ts | 6 + lib/routes/dw/news.ts | 91 +++++++++++++ lib/routes/dw/rss.ts | 62 +++++++++ lib/routes/dw/templates/description.art | 23 ++++ lib/routes/dw/templates/liveblog.art | 13 ++ lib/routes/dw/templates/video.art | 14 ++ lib/routes/dw/utils.ts | 171 ++++++++++++++++++++++++ 7 files changed, 380 insertions(+) create mode 100644 lib/routes/dw/namespace.ts create mode 100644 lib/routes/dw/news.ts create mode 100644 lib/routes/dw/rss.ts create mode 100644 lib/routes/dw/templates/description.art create mode 100644 lib/routes/dw/templates/liveblog.art create mode 100644 lib/routes/dw/templates/video.art create mode 100644 lib/routes/dw/utils.ts diff --git a/lib/routes/dw/namespace.ts b/lib/routes/dw/namespace.ts new file mode 100644 index 00000000000000..33696e36c0a8c1 --- /dev/null +++ b/lib/routes/dw/namespace.ts @@ -0,0 +1,6 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'DW Deutsche Welle', + url: 'dw.com', +}; diff --git a/lib/routes/dw/news.ts b/lib/routes/dw/news.ts new file mode 100644 index 00000000000000..e9391b0d7058f5 --- /dev/null +++ b/lib/routes/dw/news.ts @@ -0,0 +1,91 @@ +import { Route } from '@/types'; +import { processItems } from './utils'; +import got from '@/utils/got'; +import cache from '@/utils/cache'; +import { config } from '@/config'; + +export const route: Route = { + path: '/news/:lang?/:id?', + categories: ['traditional-media'], + example: '/dw/news', + parameters: { + lang: 'Language, see below, default to en', + id: 'Category ID, see below, default to the id of the Top Stories Page of the language chosen', + }, + features: { + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + requireConfig: false, + }, + name: 'News', + maintainers: ['quiniapiezoelectricity'], + handler, + description: ` +:::tip +Parameters can be obtained from the official website, for instance: +For the site https://www.dw.com/de/deutschland/s-12321 the language code would be \`de\` and the category ID would be \`s-1432\`. +::: +`, + radar: [ + { + source: ['www.dw.com/:lang/:name/:id'], + target: '/news/:lang/:id', + }, + ], +}; + +const defaultUrl = `https://www.dw.com/graph-api/en/content/navigation/9097`; +const typenames = new Set(['Article', 'Liveblog', 'Video']); + +async function handler(ctx) { + const lang = ctx.req.param('lang') ?? 'en'; + let id = ctx.req.param('id'); + + if (/^s-\d+$/.test(id)) { + id = id.match(/^s-(\d+)$/i)[1]; // convert s-1234 id to 1234 + } else if (id === undefined) { + // Look up the id of the Top Stories Page of the selected language if id is not specified in the URL. + const navigation = await cache.tryGet( + 'dw:navigation', + async () => { + const res = await got(defaultUrl); + return res.data.data.content.topStoriesNavigations; + }, + config.cache.routeExpire, + false + ); + id = navigation + .map((item) => item.namedUrl.split('/')) + .find((item) => item[1] === lang)[3] + .match(/^s-(\d+)$/i)[1]; + } + + const response = await got(`https://www.dw.com/graph-api/${lang}/content/navigation/${id}`); + const feed = response.data.data.content; + cache.set('dw:navigation', feed.topStoriesNavigations, config.cache.routeExpire); + + const list = feed.contentComposition.informationSpaces + .flatMap((section) => Object.values(section).flatMap((component) => component[0]?.contents || [])) + .filter((item) => typenames.has(item.__typename) && item.id); + const items = await processItems( + list.map((item) => { + item.link = new URL(item.namedUrl, 'https://www.dw.com').href; + item.pubDate = item.contentDate; + item.description = item.teaser; + item.language = lang; + item.type = item.__typename.toLowerCase(); + return item; + }) + ); + + return { + title: `DW | ${feed.title}`, + link: feed.canonicalUrl, + description: feed.metaDescription, + language: feed.topStoriesNavigations.find((item) => item.namedUrl.startsWith(`/${lang}/`))?.localeLang ?? lang, + item: items, + }; +} diff --git a/lib/routes/dw/rss.ts b/lib/routes/dw/rss.ts new file mode 100644 index 00000000000000..835f267a653fe7 --- /dev/null +++ b/lib/routes/dw/rss.ts @@ -0,0 +1,62 @@ +import { Route } from '@/types'; +import { config } from '@/config'; +import Parser from 'rss-parser'; +import { processItems } from './utils'; + +export const route: Route = { + path: '/rss/:channel?', + categories: ['traditional-media'], + example: '/dw/rss/rss-en-all', + parameters: { + category: 'RSS Feed Channel, see below, `rss-en-all` by default', + }, + features: { + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + requireConfig: false, + }, + name: 'RSS', + maintainers: ['quiniapiezoelectricity'], + handler, + description: ` +For a full list of RSS Feed Channels in English, please refer to [DW RSS Feeds](https://corporate.dw.com/en/rss-feeds/a-68693346). +RSS Feed Channels in other languages are also available, for example: \`rss-chi-all\` renders the RSS feed in Chinese and \`rss-de-all\` for the RSS Feed in German +`, +}; + +async function handler(ctx) { + const category = ctx.req.param('channel') ?? 'rss-en-all'; + + const parser = new Parser({ + customFields: { + item: ['dwsyn:contentID'], + }, + headers: { + 'User-Agent': config.ua, + }, + }); + + const feed = await parser.parseURL(`https://rss.dw.com/rdf/${category}`); + const items = await processItems( + feed.items.map((item) => { + item.id = item['dwsyn:contentID']; + item.pubDate = item.isoDate; + item.description = item.content; + const link = new URL(item.link); + link.search = ''; + item.link = link.href; + item.type = link.pathname.substring(link.pathname.lastIndexOf('/') + 1).startsWith('live-') ? 'liveblog' : 'article'; // dw rss feed only includes liveblogs and articles + return item; + }) + ); + + return { + title: feed.title, + link: feed.link, + description: feed.description, + item: items, + }; +} diff --git a/lib/routes/dw/templates/description.art b/lib/routes/dw/templates/description.art new file mode 100644 index 00000000000000..bc96a9c68a971f --- /dev/null +++ b/lib/routes/dw/templates/description.art @@ -0,0 +1,23 @@ +{{ if teaser }} +

{{ teaser }}

+{{ /if }} +{{ if video }} + {{@ video }} +{{ else if mainImage }} +
+ {{ mainImage.additionalInformation }} +
+ {{ mainImage.description }} + {{ imageI18n }}: {{ mainImage.target.licenserSupplement }} +
+
+{{ /if }} +{{ if text }} + {{@ text }} +{{ /if }} +{{ if liveblog }} + {{@ liveblog }} +{{ /if }} \ No newline at end of file diff --git a/lib/routes/dw/templates/liveblog.art b/lib/routes/dw/templates/liveblog.art new file mode 100644 index 00000000000000..d133a1ed31b59f --- /dev/null +++ b/lib/routes/dw/templates/liveblog.art @@ -0,0 +1,13 @@ +{{ if posts }} +{{ each posts }} +
+ {{ if $value.localizedContentDate }}

{{ $value.localizedContentDate }}

{{ /if }} + {{ if $value.title }}

{{ $value.title }}

{{ /if }} + {{ if $value.persons }} + {{ each $value.persons }} +

{{ $value.fullName }}

+ {{ /each }} + {{ /if }} + {{ if $value.text }}{{@ $value.text }}{{ /if }} +{{ /each }} +{{ /if }} \ No newline at end of file diff --git a/lib/routes/dw/templates/video.art b/lib/routes/dw/templates/video.art new file mode 100644 index 00000000000000..7a6060404e910e --- /dev/null +++ b/lib/routes/dw/templates/video.art @@ -0,0 +1,14 @@ +{{ if hlsVideoSrc }} + +{{ /if }} diff --git a/lib/routes/dw/utils.ts b/lib/routes/dw/utils.ts new file mode 100644 index 00000000000000..47637ef1e75d1b --- /dev/null +++ b/lib/routes/dw/utils.ts @@ -0,0 +1,171 @@ +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { load, type CheerioAPI } from 'cheerio'; +import { art } from '@/utils/render'; +import path from 'node:path'; +import { getCurrentPath } from '@/utils/helpers'; + +const __dirname = getCurrentPath(import.meta.url); +const formatId = '605'; + +const i18n = (word: string, lang: string) => { + switch (word) { + case 'Image': + switch (lang) { + case 'sq': + return 'Fotografi'; + case 'am': + return 'ምስል'; + case 'ar': + return 'صورة من'; + case 'bn': + return 'ছবি'; + case 'bs': + return 'Foto'; + case 'bg': + return 'Снимка'; + case 'zh': + return '图像来源'; + case 'zh-hant': + return '圖片來源'; + case 'hr': + return 'Foto'; + case 'fa-af': + return 'عکس'; + case 'en': + return 'Image'; + case 'fr': + return 'Image'; + case 'de': + return 'Bild'; + case 'el': + return 'Εικόνα'; + case 'ha': + return 'Hoto'; + case 'hi': + return 'तस्वीर'; + case 'id': + return 'Foto'; + case 'sw': + return 'Picha'; + case 'mk': + return 'Фотографија'; + case 'ps': + return 'انځور'; + case 'fa-ir': + return 'عکس'; + case 'pl': + return 'Zdjęcie'; + case 'pt-002': + return 'Foto'; + case 'pt-br': + return 'Foto'; + case 'ro': + return 'Imagine'; + case 'ru': + return 'Фото'; + case 'sr': + return 'Foto'; + case 'es': + return 'Imagen'; + case 'tr': + return 'Fotoğraf'; + case 'uk': + return 'Фото'; + case 'ur': + return 'تصویر'; + default: + return 'Image'; + } + default: + return word; + } +}; + +const m3u8tomp4 = (src: string) => src.replace('https://hlsvod.dw.com/i/', 'https://tvdownloaddw-a.akamaihd.net/').replace(',AVC_480x270,AVC_512x288,AVC_640x360,AVC_960x540,AVC_1280x720,AVC_1920x1080,.mp4.csmil/master.m3u8', 'AVC_1920x1080.mp4'); + +const processHtml = ($: CheerioAPI, contentLinks) => { + $('img').each((_, elem) => { + try { + const id = $(elem).attr('data-id'); + const contentLink = contentLinks.find((item) => String(item.targetId) === id); + $(elem).attr({ + title: contentLink?.name, + alt: contentLink?.description, + src: `https://static.dw.com/image/${id}_${formatId}.jpg`, + }); + $(elem).removeAttr('style'); + } catch { + // no-empty + } + }); + $('video').each((_, elem) => { + try { + $(elem).attr('poster', $(elem).attr('data-posterurl')); + } catch { + // no-empty + } + }); + $('iframe').each((_, elem) => { + try { + $(elem).attr('src', $(elem).attr('data-src')); + } catch { + // no-empty + } + }); + $('svg').remove(); // svg will screw up in a lot of rss readers +}; + +const processContent = (item, content) => { + const $text = load(content.text); + processHtml($text, content.contentLinks); + const liveblog = + item.type === 'liveblog' && content.posts + ? art(path.join(__dirname, 'templates/liveblog.art'), { + posts: content.posts.map((post) => { + const $post = load(post.text); + processHtml($post, content.contentLinks); + post.text = $post.html(); + return post; + }), + }) + : undefined; + const video = + item.type === 'video' && content.hlsVideoSrc + ? art(path.join(__dirname, 'templates/video.art'), { + hlsVideoSrc: content.hlsVideoSrc, + mp4VideoSrc: m3u8tomp4(content.hlsVideoSrc), + posterImageUrl: content.posterImageUrl, + }) + : undefined; + item.description = art(path.join(__dirname, 'templates/description.art'), { + teaser: content.teaser, + video, + mainImage: $text(`[data-id="${content.mainContentImageLink?.targetId}"]`).length === 0 ? content.mainContentImageLink : undefined, + // occasionally the text html already includes the main image, testing to see if an image with the same id exists + text: $text.html(), + liveblog, + imageI18n: i18n('Image', item.language), + formatId, + }); + if (content.trackingCategories) { + item.category = content.trackingCategories; + } + if (content.firstPersonArray) { + item.author = content.firstPersonArray.map((person) => person.fullName).join(', '); + } + return item; +}; + +export const processItems = async (items) => { + items = await Promise.all( + items.map((item) => + cache.tryGet(item.link, async () => { + const response = await got(`https://www.dw.com/graph-api/${item.language}/content/${item.type}/${item.id}`); + const content = response.data.data.content; + return processContent(item, content); + }) + ) + ); + return items; +};