forked from DIYgod/RSSHub
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(route): add dw route (DIYgod#17575)
* feat(route): add dw route * fix * Apply suggestions from code review Co-authored-by: Tony <[email protected]> * Apply suggestions from code review * Apply suggestions with code review * add mp4 video src * fix: preload metadata --------
- Loading branch information
1 parent
682d954
commit 7ce029a
Showing
7 changed files
with
380 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import type { Namespace } from '@/types'; | ||
|
||
export const namespace: Namespace = { | ||
name: 'DW Deutsche Welle', | ||
url: 'dw.com', | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import { Route } from '@/types'; | ||
import { processItems } from './utils'; | ||
import got from '@/utils/got'; | ||
import cache from '@/utils/cache'; | ||
import { config } from '@/config'; | ||
|
||
export const route: Route = { | ||
path: '/news/:lang?/:id?', | ||
categories: ['traditional-media'], | ||
example: '/dw/news', | ||
parameters: { | ||
lang: 'Language, see below, default to en', | ||
id: 'Category ID, see below, default to the id of the Top Stories Page of the language chosen', | ||
}, | ||
features: { | ||
requirePuppeteer: false, | ||
antiCrawler: false, | ||
supportBT: false, | ||
supportPodcast: false, | ||
supportScihub: false, | ||
requireConfig: false, | ||
}, | ||
name: 'News', | ||
maintainers: ['quiniapiezoelectricity'], | ||
handler, | ||
description: ` | ||
:::tip | ||
Parameters can be obtained from the official website, for instance: | ||
For the site https://www.dw.com/de/deutschland/s-12321 the language code would be \`de\` and the category ID would be \`s-1432\`. | ||
::: | ||
`, | ||
radar: [ | ||
{ | ||
source: ['www.dw.com/:lang/:name/:id'], | ||
target: '/news/:lang/:id', | ||
}, | ||
], | ||
}; | ||
|
||
const defaultUrl = `https://www.dw.com/graph-api/en/content/navigation/9097`; | ||
const typenames = new Set(['Article', 'Liveblog', 'Video']); | ||
|
||
async function handler(ctx) { | ||
const lang = ctx.req.param('lang') ?? 'en'; | ||
let id = ctx.req.param('id'); | ||
|
||
if (/^s-\d+$/.test(id)) { | ||
id = id.match(/^s-(\d+)$/i)[1]; // convert s-1234 id to 1234 | ||
} else if (id === undefined) { | ||
// Look up the id of the Top Stories Page of the selected language if id is not specified in the URL. | ||
const navigation = await cache.tryGet( | ||
'dw:navigation', | ||
async () => { | ||
const res = await got(defaultUrl); | ||
return res.data.data.content.topStoriesNavigations; | ||
}, | ||
config.cache.routeExpire, | ||
false | ||
); | ||
id = navigation | ||
.map((item) => item.namedUrl.split('/')) | ||
.find((item) => item[1] === lang)[3] | ||
.match(/^s-(\d+)$/i)[1]; | ||
} | ||
|
||
const response = await got(`https://www.dw.com/graph-api/${lang}/content/navigation/${id}`); | ||
const feed = response.data.data.content; | ||
cache.set('dw:navigation', feed.topStoriesNavigations, config.cache.routeExpire); | ||
|
||
const list = feed.contentComposition.informationSpaces | ||
.flatMap((section) => Object.values(section).flatMap((component) => component[0]?.contents || [])) | ||
.filter((item) => typenames.has(item.__typename) && item.id); | ||
const items = await processItems( | ||
list.map((item) => { | ||
item.link = new URL(item.namedUrl, 'https://www.dw.com').href; | ||
item.pubDate = item.contentDate; | ||
item.description = item.teaser; | ||
item.language = lang; | ||
item.type = item.__typename.toLowerCase(); | ||
return item; | ||
}) | ||
); | ||
|
||
return { | ||
title: `DW | ${feed.title}`, | ||
link: feed.canonicalUrl, | ||
description: feed.metaDescription, | ||
language: feed.topStoriesNavigations.find((item) => item.namedUrl.startsWith(`/${lang}/`))?.localeLang ?? lang, | ||
item: items, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import { Route } from '@/types'; | ||
import { config } from '@/config'; | ||
import Parser from 'rss-parser'; | ||
import { processItems } from './utils'; | ||
|
||
export const route: Route = { | ||
path: '/rss/:channel?', | ||
categories: ['traditional-media'], | ||
example: '/dw/rss/rss-en-all', | ||
parameters: { | ||
category: 'RSS Feed Channel, see below, `rss-en-all` by default', | ||
}, | ||
features: { | ||
requirePuppeteer: false, | ||
antiCrawler: false, | ||
supportBT: false, | ||
supportPodcast: false, | ||
supportScihub: false, | ||
requireConfig: false, | ||
}, | ||
name: 'RSS', | ||
maintainers: ['quiniapiezoelectricity'], | ||
handler, | ||
description: ` | ||
For a full list of RSS Feed Channels in English, please refer to [DW RSS Feeds](https://corporate.dw.com/en/rss-feeds/a-68693346). | ||
RSS Feed Channels in other languages are also available, for example: \`rss-chi-all\` renders the RSS feed in Chinese and \`rss-de-all\` for the RSS Feed in German | ||
`, | ||
}; | ||
|
||
async function handler(ctx) { | ||
const category = ctx.req.param('channel') ?? 'rss-en-all'; | ||
|
||
const parser = new Parser({ | ||
customFields: { | ||
item: ['dwsyn:contentID'], | ||
}, | ||
headers: { | ||
'User-Agent': config.ua, | ||
}, | ||
}); | ||
|
||
const feed = await parser.parseURL(`https://rss.dw.com/rdf/${category}`); | ||
const items = await processItems( | ||
feed.items.map((item) => { | ||
item.id = item['dwsyn:contentID']; | ||
item.pubDate = item.isoDate; | ||
item.description = item.content; | ||
const link = new URL(item.link); | ||
link.search = ''; | ||
item.link = link.href; | ||
item.type = link.pathname.substring(link.pathname.lastIndexOf('/') + 1).startsWith('live-') ? 'liveblog' : 'article'; // dw rss feed only includes liveblogs and articles | ||
return item; | ||
}) | ||
); | ||
|
||
return { | ||
title: feed.title, | ||
link: feed.link, | ||
description: feed.description, | ||
item: items, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{{ if teaser }} | ||
<blockquote><p><em>{{ teaser }}</em></p></blockquote> | ||
{{ /if }} | ||
{{ if video }} | ||
{{@ video }} | ||
{{ else if mainImage }} | ||
<figure> | ||
<img | ||
src="https://static.dw.com/image/{{ mainImage.targetId }}_{{ formatId }}.jpg" | ||
alt="{{ mainImage.additionalInformation }}" | ||
> | ||
<figcaption> | ||
{{ mainImage.description }} | ||
<small>{{ imageI18n }}: {{ mainImage.target.licenserSupplement }}</small> | ||
</figcaption> | ||
</figure> | ||
{{ /if }} | ||
{{ if text }} | ||
{{@ text }} | ||
{{ /if }} | ||
{{ if liveblog }} | ||
{{@ liveblog }} | ||
{{ /if }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{{ if posts }} | ||
{{ each posts }} | ||
<hr> | ||
{{ if $value.localizedContentDate }}<p><i>{{ $value.localizedContentDate }}</i></p>{{ /if }} | ||
{{ if $value.title }}<h2>{{ $value.title }}</h2>{{ /if }} | ||
{{ if $value.persons }} | ||
{{ each $value.persons }} | ||
<p><i>{{ $value.fullName }}</i></p> | ||
{{ /each }} | ||
{{ /if }} | ||
{{ if $value.text }}{{@ $value.text }}{{ /if }} | ||
{{ /each }} | ||
{{ /if }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
{{ if hlsVideoSrc }} | ||
<video | ||
{{ if posterImageUrl }} poster="{{ posterImageUrl }}" {{ /if }} | ||
controls preload="metadata"> | ||
<source | ||
src="{{ hlsVideoSrc }}" | ||
type="application/x-mpegURL" | ||
> | ||
<source | ||
src="{{ mp4VideoSrc }}" | ||
type="video/mp4" | ||
> | ||
</video> | ||
{{ /if }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
import cache from '@/utils/cache'; | ||
import got from '@/utils/got'; | ||
import { load, type CheerioAPI } from 'cheerio'; | ||
import { art } from '@/utils/render'; | ||
import path from 'node:path'; | ||
import { getCurrentPath } from '@/utils/helpers'; | ||
|
||
const __dirname = getCurrentPath(import.meta.url); | ||
const formatId = '605'; | ||
|
||
const i18n = (word: string, lang: string) => { | ||
switch (word) { | ||
case 'Image': | ||
switch (lang) { | ||
case 'sq': | ||
return 'Fotografi'; | ||
case 'am': | ||
return 'ምስል'; | ||
case 'ar': | ||
return 'صورة من'; | ||
case 'bn': | ||
return 'ছবি'; | ||
case 'bs': | ||
return 'Foto'; | ||
case 'bg': | ||
return 'Снимка'; | ||
case 'zh': | ||
return '图像来源'; | ||
case 'zh-hant': | ||
return '圖片來源'; | ||
case 'hr': | ||
return 'Foto'; | ||
case 'fa-af': | ||
return 'عکس'; | ||
case 'en': | ||
return 'Image'; | ||
case 'fr': | ||
return 'Image'; | ||
case 'de': | ||
return 'Bild'; | ||
case 'el': | ||
return 'Εικόνα'; | ||
case 'ha': | ||
return 'Hoto'; | ||
case 'hi': | ||
return 'तस्वीर'; | ||
case 'id': | ||
return 'Foto'; | ||
case 'sw': | ||
return 'Picha'; | ||
case 'mk': | ||
return 'Фотографија'; | ||
case 'ps': | ||
return 'انځور'; | ||
case 'fa-ir': | ||
return 'عکس'; | ||
case 'pl': | ||
return 'Zdjęcie'; | ||
case 'pt-002': | ||
return 'Foto'; | ||
case 'pt-br': | ||
return 'Foto'; | ||
case 'ro': | ||
return 'Imagine'; | ||
case 'ru': | ||
return 'Фото'; | ||
case 'sr': | ||
return 'Foto'; | ||
case 'es': | ||
return 'Imagen'; | ||
case 'tr': | ||
return 'Fotoğraf'; | ||
case 'uk': | ||
return 'Фото'; | ||
case 'ur': | ||
return 'تصویر'; | ||
default: | ||
return 'Image'; | ||
} | ||
default: | ||
return word; | ||
} | ||
}; | ||
|
||
const m3u8tomp4 = (src: string) => src.replace('https://hlsvod.dw.com/i/', 'https://tvdownloaddw-a.akamaihd.net/').replace(',AVC_480x270,AVC_512x288,AVC_640x360,AVC_960x540,AVC_1280x720,AVC_1920x1080,.mp4.csmil/master.m3u8', 'AVC_1920x1080.mp4'); | ||
|
||
const processHtml = ($: CheerioAPI, contentLinks) => { | ||
$('img').each((_, elem) => { | ||
try { | ||
const id = $(elem).attr('data-id'); | ||
const contentLink = contentLinks.find((item) => String(item.targetId) === id); | ||
$(elem).attr({ | ||
title: contentLink?.name, | ||
alt: contentLink?.description, | ||
src: `https://static.dw.com/image/${id}_${formatId}.jpg`, | ||
}); | ||
$(elem).removeAttr('style'); | ||
} catch { | ||
// no-empty | ||
} | ||
}); | ||
$('video').each((_, elem) => { | ||
try { | ||
$(elem).attr('poster', $(elem).attr('data-posterurl')); | ||
} catch { | ||
// no-empty | ||
} | ||
}); | ||
$('iframe').each((_, elem) => { | ||
try { | ||
$(elem).attr('src', $(elem).attr('data-src')); | ||
} catch { | ||
// no-empty | ||
} | ||
}); | ||
$('svg').remove(); // svg will screw up in a lot of rss readers | ||
}; | ||
|
||
const processContent = (item, content) => { | ||
const $text = load(content.text); | ||
processHtml($text, content.contentLinks); | ||
const liveblog = | ||
item.type === 'liveblog' && content.posts | ||
? art(path.join(__dirname, 'templates/liveblog.art'), { | ||
posts: content.posts.map((post) => { | ||
const $post = load(post.text); | ||
processHtml($post, content.contentLinks); | ||
post.text = $post.html(); | ||
return post; | ||
}), | ||
}) | ||
: undefined; | ||
const video = | ||
item.type === 'video' && content.hlsVideoSrc | ||
? art(path.join(__dirname, 'templates/video.art'), { | ||
hlsVideoSrc: content.hlsVideoSrc, | ||
mp4VideoSrc: m3u8tomp4(content.hlsVideoSrc), | ||
posterImageUrl: content.posterImageUrl, | ||
}) | ||
: undefined; | ||
item.description = art(path.join(__dirname, 'templates/description.art'), { | ||
teaser: content.teaser, | ||
video, | ||
mainImage: $text(`[data-id="${content.mainContentImageLink?.targetId}"]`).length === 0 ? content.mainContentImageLink : undefined, | ||
// occasionally the text html already includes the main image, testing to see if an image with the same id exists | ||
text: $text.html(), | ||
liveblog, | ||
imageI18n: i18n('Image', item.language), | ||
formatId, | ||
}); | ||
if (content.trackingCategories) { | ||
item.category = content.trackingCategories; | ||
} | ||
if (content.firstPersonArray) { | ||
item.author = content.firstPersonArray.map((person) => person.fullName).join(', '); | ||
} | ||
return item; | ||
}; | ||
|
||
export const processItems = async (items) => { | ||
items = await Promise.all( | ||
items.map((item) => | ||
cache.tryGet(item.link, async () => { | ||
const response = await got(`https://www.dw.com/graph-api/${item.language}/content/${item.type}/${item.id}`); | ||
const content = response.data.data.content; | ||
return processContent(item, content); | ||
}) | ||
) | ||
); | ||
return items; | ||
}; |