Skip to content

Commit

Permalink
feat(route): add dw route (DIYgod#17575)
Browse files Browse the repository at this point in the history
* feat(route): add dw route

* fix

* Apply suggestions from code review

Co-authored-by: Tony <[email protected]>

* Apply suggestions from code review

* Apply suggestions with code review

* add mp4 video src

* fix: preload metadata

--------
  • Loading branch information
quiniapiezoelectricity authored Nov 15, 2024
1 parent 682d954 commit 7ce029a
Show file tree
Hide file tree
Showing 7 changed files with 380 additions and 0 deletions.
6 changes: 6 additions & 0 deletions lib/routes/dw/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: 'DW Deutsche Welle',
url: 'dw.com',
};
91 changes: 91 additions & 0 deletions lib/routes/dw/news.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import { Route } from '@/types';
import { processItems } from './utils';
import got from '@/utils/got';
import cache from '@/utils/cache';
import { config } from '@/config';

export const route: Route = {
path: '/news/:lang?/:id?',
categories: ['traditional-media'],
example: '/dw/news',
parameters: {
lang: 'Language, see below, default to en',
id: 'Category ID, see below, default to the id of the Top Stories Page of the language chosen',
},
features: {
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
requireConfig: false,
},
name: 'News',
maintainers: ['quiniapiezoelectricity'],
handler,
description: `
:::tip
Parameters can be obtained from the official website, for instance:
For the site https://www.dw.com/de/deutschland/s-12321 the language code would be \`de\` and the category ID would be \`s-1432\`.
:::
`,
radar: [
{
source: ['www.dw.com/:lang/:name/:id'],
target: '/news/:lang/:id',
},
],
};

const defaultUrl = `https://www.dw.com/graph-api/en/content/navigation/9097`;
const typenames = new Set(['Article', 'Liveblog', 'Video']);

async function handler(ctx) {
const lang = ctx.req.param('lang') ?? 'en';
let id = ctx.req.param('id');

if (/^s-\d+$/.test(id)) {
id = id.match(/^s-(\d+)$/i)[1]; // convert s-1234 id to 1234
} else if (id === undefined) {
// Look up the id of the Top Stories Page of the selected language if id is not specified in the URL.
const navigation = await cache.tryGet(
'dw:navigation',
async () => {
const res = await got(defaultUrl);
return res.data.data.content.topStoriesNavigations;
},
config.cache.routeExpire,
false
);
id = navigation
.map((item) => item.namedUrl.split('/'))
.find((item) => item[1] === lang)[3]
.match(/^s-(\d+)$/i)[1];
}

const response = await got(`https://www.dw.com/graph-api/${lang}/content/navigation/${id}`);
const feed = response.data.data.content;
cache.set('dw:navigation', feed.topStoriesNavigations, config.cache.routeExpire);

const list = feed.contentComposition.informationSpaces
.flatMap((section) => Object.values(section).flatMap((component) => component[0]?.contents || []))
.filter((item) => typenames.has(item.__typename) && item.id);
const items = await processItems(
list.map((item) => {
item.link = new URL(item.namedUrl, 'https://www.dw.com').href;
item.pubDate = item.contentDate;
item.description = item.teaser;
item.language = lang;
item.type = item.__typename.toLowerCase();
return item;
})
);

return {
title: `DW | ${feed.title}`,
link: feed.canonicalUrl,
description: feed.metaDescription,
language: feed.topStoriesNavigations.find((item) => item.namedUrl.startsWith(`/${lang}/`))?.localeLang ?? lang,
item: items,
};
}
62 changes: 62 additions & 0 deletions lib/routes/dw/rss.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { Route } from '@/types';
import { config } from '@/config';
import Parser from 'rss-parser';
import { processItems } from './utils';

export const route: Route = {
path: '/rss/:channel?',
categories: ['traditional-media'],
example: '/dw/rss/rss-en-all',
parameters: {
category: 'RSS Feed Channel, see below, `rss-en-all` by default',
},
features: {
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
requireConfig: false,
},
name: 'RSS',
maintainers: ['quiniapiezoelectricity'],
handler,
description: `
For a full list of RSS Feed Channels in English, please refer to [DW RSS Feeds](https://corporate.dw.com/en/rss-feeds/a-68693346).
RSS Feed Channels in other languages are also available, for example: \`rss-chi-all\` renders the RSS feed in Chinese and \`rss-de-all\` for the RSS Feed in German
`,
};

async function handler(ctx) {
const category = ctx.req.param('channel') ?? 'rss-en-all';

const parser = new Parser({
customFields: {
item: ['dwsyn:contentID'],
},
headers: {
'User-Agent': config.ua,
},
});

const feed = await parser.parseURL(`https://rss.dw.com/rdf/${category}`);
const items = await processItems(
feed.items.map((item) => {
item.id = item['dwsyn:contentID'];
item.pubDate = item.isoDate;
item.description = item.content;
const link = new URL(item.link);
link.search = '';
item.link = link.href;
item.type = link.pathname.substring(link.pathname.lastIndexOf('/') + 1).startsWith('live-') ? 'liveblog' : 'article'; // dw rss feed only includes liveblogs and articles
return item;
})
);

return {
title: feed.title,
link: feed.link,
description: feed.description,
item: items,
};
}
23 changes: 23 additions & 0 deletions lib/routes/dw/templates/description.art
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{{ if teaser }}
<blockquote><p><em>{{ teaser }}</em></p></blockquote>
{{ /if }}
{{ if video }}
{{@ video }}
{{ else if mainImage }}
<figure>
<img
src="https://static.dw.com/image/{{ mainImage.targetId }}_{{ formatId }}.jpg"
alt="{{ mainImage.additionalInformation }}"
>
<figcaption>
{{ mainImage.description }}
<small>{{ imageI18n }}: {{ mainImage.target.licenserSupplement }}</small>
</figcaption>
</figure>
{{ /if }}
{{ if text }}
{{@ text }}
{{ /if }}
{{ if liveblog }}
{{@ liveblog }}
{{ /if }}
13 changes: 13 additions & 0 deletions lib/routes/dw/templates/liveblog.art
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{{ if posts }}
{{ each posts }}
<hr>
{{ if $value.localizedContentDate }}<p><i>{{ $value.localizedContentDate }}</i></p>{{ /if }}
{{ if $value.title }}<h2>{{ $value.title }}</h2>{{ /if }}
{{ if $value.persons }}
{{ each $value.persons }}
<p><i>{{ $value.fullName }}</i></p>
{{ /each }}
{{ /if }}
{{ if $value.text }}{{@ $value.text }}{{ /if }}
{{ /each }}
{{ /if }}
14 changes: 14 additions & 0 deletions lib/routes/dw/templates/video.art
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{ if hlsVideoSrc }}
<video
{{ if posterImageUrl }} poster="{{ posterImageUrl }}" {{ /if }}
controls preload="metadata">
<source
src="{{ hlsVideoSrc }}"
type="application/x-mpegURL"
>
<source
src="{{ mp4VideoSrc }}"
type="video/mp4"
>
</video>
{{ /if }}
171 changes: 171 additions & 0 deletions lib/routes/dw/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load, type CheerioAPI } from 'cheerio';
import { art } from '@/utils/render';
import path from 'node:path';
import { getCurrentPath } from '@/utils/helpers';

const __dirname = getCurrentPath(import.meta.url);
const formatId = '605';

const i18n = (word: string, lang: string) => {
switch (word) {
case 'Image':
switch (lang) {
case 'sq':
return 'Fotografi';
case 'am':
return 'ምስል';
case 'ar':
return 'صورة من';
case 'bn':
return 'ছবি';
case 'bs':
return 'Foto';
case 'bg':
return 'Снимка';
case 'zh':
return '图像来源';
case 'zh-hant':
return '圖片來源';
case 'hr':
return 'Foto';
case 'fa-af':
return 'عکس';
case 'en':
return 'Image';
case 'fr':
return 'Image';
case 'de':
return 'Bild';
case 'el':
return 'Εικόνα';
case 'ha':
return 'Hoto';
case 'hi':
return 'तस्वीर';
case 'id':
return 'Foto';
case 'sw':
return 'Picha';
case 'mk':
return 'Фотографија';
case 'ps':
return 'انځور';
case 'fa-ir':
return 'عکس';
case 'pl':
return 'Zdjęcie';
case 'pt-002':
return 'Foto';
case 'pt-br':
return 'Foto';
case 'ro':
return 'Imagine';
case 'ru':
return 'Фото';
case 'sr':
return 'Foto';
case 'es':
return 'Imagen';
case 'tr':
return 'Fotoğraf';
case 'uk':
return 'Фото';
case 'ur':
return 'تصویر';
default:
return 'Image';
}
default:
return word;
}
};

const m3u8tomp4 = (src: string) => src.replace('https://hlsvod.dw.com/i/', 'https://tvdownloaddw-a.akamaihd.net/').replace(',AVC_480x270,AVC_512x288,AVC_640x360,AVC_960x540,AVC_1280x720,AVC_1920x1080,.mp4.csmil/master.m3u8', 'AVC_1920x1080.mp4');

const processHtml = ($: CheerioAPI, contentLinks) => {
$('img').each((_, elem) => {
try {
const id = $(elem).attr('data-id');
const contentLink = contentLinks.find((item) => String(item.targetId) === id);
$(elem).attr({
title: contentLink?.name,
alt: contentLink?.description,
src: `https://static.dw.com/image/${id}_${formatId}.jpg`,
});
$(elem).removeAttr('style');
} catch {
// no-empty
}
});
$('video').each((_, elem) => {
try {
$(elem).attr('poster', $(elem).attr('data-posterurl'));
} catch {
// no-empty
}
});
$('iframe').each((_, elem) => {
try {
$(elem).attr('src', $(elem).attr('data-src'));
} catch {
// no-empty
}
});
$('svg').remove(); // svg will screw up in a lot of rss readers
};

const processContent = (item, content) => {
const $text = load(content.text);
processHtml($text, content.contentLinks);
const liveblog =
item.type === 'liveblog' && content.posts
? art(path.join(__dirname, 'templates/liveblog.art'), {
posts: content.posts.map((post) => {
const $post = load(post.text);
processHtml($post, content.contentLinks);
post.text = $post.html();
return post;
}),
})
: undefined;
const video =
item.type === 'video' && content.hlsVideoSrc
? art(path.join(__dirname, 'templates/video.art'), {
hlsVideoSrc: content.hlsVideoSrc,
mp4VideoSrc: m3u8tomp4(content.hlsVideoSrc),
posterImageUrl: content.posterImageUrl,
})
: undefined;
item.description = art(path.join(__dirname, 'templates/description.art'), {
teaser: content.teaser,
video,
mainImage: $text(`[data-id="${content.mainContentImageLink?.targetId}"]`).length === 0 ? content.mainContentImageLink : undefined,
// occasionally the text html already includes the main image, testing to see if an image with the same id exists
text: $text.html(),
liveblog,
imageI18n: i18n('Image', item.language),
formatId,
});
if (content.trackingCategories) {
item.category = content.trackingCategories;
}
if (content.firstPersonArray) {
item.author = content.firstPersonArray.map((person) => person.fullName).join(', ');
}
return item;
};

export const processItems = async (items) => {
items = await Promise.all(
items.map((item) =>
cache.tryGet(item.link, async () => {
const response = await got(`https://www.dw.com/graph-api/${item.language}/content/${item.type}/${item.id}`);
const content = response.data.data.content;
return processContent(item, content);
})
)
);
return items;
};

0 comments on commit 7ce029a

Please sign in to comment.