From 42d720ad910e100f9767fbb54e698182df7c3812 Mon Sep 17 00:00:00 2001 From: smuggler1743 <33158166+gz4zzxc@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:03:01 +0800 Subject: [PATCH 01/10] feat(route):add GDUFS news route && GDUFS xwxy news --- lib/routes/gdufs/namespace.ts | 7 +++ lib/routes/gdufs/news.ts | 89 ++++++++++++++++++++++++++++++ lib/routes/gdufs/xwxy/xwxy-news.ts | 89 ++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+) create mode 100644 lib/routes/gdufs/namespace.ts create mode 100644 lib/routes/gdufs/news.ts create mode 100644 lib/routes/gdufs/xwxy/xwxy-news.ts diff --git a/lib/routes/gdufs/namespace.ts b/lib/routes/gdufs/namespace.ts new file mode 100644 index 00000000000000..78419f7dcb1f72 --- /dev/null +++ b/lib/routes/gdufs/namespace.ts @@ -0,0 +1,7 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: '广东外语外贸大学', + url: 'gdufs.edu.cn', + lang: 'zh-CN', +}; diff --git a/lib/routes/gdufs/news.ts b/lib/routes/gdufs/news.ts new file mode 100644 index 00000000000000..b96fd10f8acff9 --- /dev/null +++ b/lib/routes/gdufs/news.ts @@ -0,0 +1,89 @@ +import { Route } from '@/types'; +import { load } from 'cheerio'; +import got from '@/utils/got'; +import { parseDate } from '@/utils/parse-date'; + +const site = 'https://www.gdufs.edu.cn'; + +export const route: Route = { + path: '/news', + categories: ['university'], + example: '/gdufs/news', + parameters: {}, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['https://www.gdufs.edu.cn/gwxw/gwxw1.htm', 'https://www.gdufs.edu.cn/'], + }, + ], + name: '广东外语外贸大学-新闻', + maintainers: ['gz4zzxc'], + handler, + url: 'https://www.gdufs.edu.cn/gwxw/gwxw1.htm', +}; + +async function handler() { + const link = 'https://www.gdufs.edu.cn/gwxw/gwxw1.htm'; + + const response = await got(link); + const $ = load(response.body); + const list = $('ul.list_luntan li'); + + const items = await Promise.all( + list.toArray().map(async (element) => { + const item = $(element); + const href = item.find('a').attr('href') || ''; + const title = item.find('h5').text().trim(); + const day = item.find('h3').text().trim(); + const yearMonth = item.find('h6').text().trim(); + const dateString = yearMonth + '/' + day; + const fullLink = href.startsWith('http') ? href : new URL(href, site).href; + + const pubDate = parseDate(dateString).toUTCString(); + + let description = ''; + let author = ''; + + try { + const articleRes = await got(fullLink); + const $$ = load(articleRes.body); + description = $$('.v_news_content').html()?.trim() || ''; + + // 提取作者信息 + const authorSpans = $$('.nav01 h6 .ll span'); + authorSpans.each((_, el) => { + const text = $$(el).text().trim(); + if (text.includes('责任编辑:')) { + author = text.replace('责任编辑:', '').trim(); + } else if (text.includes('文字:')) { + author = text.replace('文字:', '').trim(); + } + }); + } catch { + description = '内容获取失败。'; + } + + return { + title, + link: fullLink, + description, + pubDate, + author, + }; + }) + ); + + return { + title: '广东外语外贸大学-新闻', + link, + description: '广东外语外贸大学-新闻', + item: items, + }; +} diff --git a/lib/routes/gdufs/xwxy/xwxy-news.ts b/lib/routes/gdufs/xwxy/xwxy-news.ts new file mode 100644 index 00000000000000..5d797feb5fc3d9 --- /dev/null +++ b/lib/routes/gdufs/xwxy/xwxy-news.ts @@ -0,0 +1,89 @@ +import { Route } from '@/types'; +import { load } from 'cheerio'; +import got from '@/utils/got'; +import { parseDate } from '@/utils/parse-date'; + +export const route: Route = { + path: '/xwxy-news', + categories: ['university'], + example: '/gdufs/xwxy-news', + parameters: {}, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['xwxy.gdufs.edu.cn/xwzx/xyxw', 'xwxy.gdufs.edu.cn/'], + }, + ], + name: '广东外语外贸大学新闻学院-学院新闻', + maintainers: ['gz4zzxc'], + handler, + url: 'xwxy.gdufs.edu.cn/xwzx/xyxw', +}; + +async function handler() { + const BASE_URL = 'https://xwxy.gdufs.edu.cn'; + const link = `${BASE_URL}/xwzx/xyxw.htm`; + + // 获取列表页面 + const response = await got(link); + if (!response.body) { + throw new Error('No response body'); + } + const $ = load(response.body); + const list = $('div.flex-center a.clearfix'); + + const items = list.toArray().map((element) => { + const item = $(element); + const href = item.attr('href') || ''; + const dateText = item.find('i').text().trim(); + const pubDate = parseDate(dateText).toUTCString(); + return { + title: item.find('h5').text().trim(), + link: href.startsWith('http') ? href : new URL(href, BASE_URL).href, + pubDate, + }; + }); + + // 获取文章详情 + const fetchArticleDetail = async (item) => { + try { + const articleResponse = await got(item.link); + if (!articleResponse.body) { + throw new Error('No article body'); + } + const $$ = load(articleResponse.body); + const content = $$('#vsb_content .v_news_content').html() || ''; + const authors = $$('.show01 p i') + .map((_, el) => $$(el).text().trim()) + .get(); + + return { + ...item, + description: content, + author: authors.join(' '), + }; + } catch { + return { + ...item, + description: '无法获取内容', + author: '', + }; + } + }; + + const enhancedItems = await Promise.all(items.map(fetchArticleDetail)); + + return { + title: '广外新传学院-学院新闻', + link, + description: '广东外语外贸大学新闻与传播学院官网-学院新闻', + item: enhancedItems, + }; +} From 0ed33a35097172ffcd5cdb4a965ea66e959cc01c Mon Sep 17 00:00:00 2001 From: smuggler1743 <33158166+gz4zzxc@users.noreply.github.com> Date: Sat, 7 Dec 2024 20:08:00 +0800 Subject: [PATCH 02/10] fix(xwxy-news): update authors extraction to use toArray() method --- lib/routes/gdufs/xwxy/xwxy-news.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/gdufs/xwxy/xwxy-news.ts b/lib/routes/gdufs/xwxy/xwxy-news.ts index 5d797feb5fc3d9..a1e574075c19df 100644 --- a/lib/routes/gdufs/xwxy/xwxy-news.ts +++ b/lib/routes/gdufs/xwxy/xwxy-news.ts @@ -62,7 +62,7 @@ async function handler() { const content = $$('#vsb_content .v_news_content').html() || ''; const authors = $$('.show01 p i') .map((_, el) => $$(el).text().trim()) - .get(); + .toArray(); return { ...item, From 93f252bd24e2ef6d7d324346425b5ce87845a095 Mon Sep 17 00:00:00 2001 From: smuggler1743 <33158166+gz4zzxc@users.noreply.github.com> Date: Sat, 7 Dec 2024 20:09:18 +0800 Subject: [PATCH 03/10] fix(xwxy-news): improve article detail fetching by explicitly passing item to fetchArticleDetail --- lib/routes/gdufs/xwxy/xwxy-news.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/gdufs/xwxy/xwxy-news.ts b/lib/routes/gdufs/xwxy/xwxy-news.ts index a1e574075c19df..734db94f5f3404 100644 --- a/lib/routes/gdufs/xwxy/xwxy-news.ts +++ b/lib/routes/gdufs/xwxy/xwxy-news.ts @@ -78,7 +78,7 @@ async function handler() { } }; - const enhancedItems = await Promise.all(items.map(fetchArticleDetail)); + const enhancedItems = await Promise.all(items.map((item) => fetchArticleDetail(item))); return { title: '广外新传学院-学院新闻', From 1f9bfc5f7e5d1f0bfd03d7565aeb90e91cbc41a5 Mon Sep 17 00:00:00 2001 From: smuggler1743 <33158166+gz4zzxc@users.noreply.github.com> Date: Sat, 7 Dec 2024 20:15:08 +0800 Subject: [PATCH 04/10] fix(xwxy-news): reorder authors extraction to trim text after converting to array --- lib/routes/gdufs/xwxy/xwxy-news.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/routes/gdufs/xwxy/xwxy-news.ts b/lib/routes/gdufs/xwxy/xwxy-news.ts index 734db94f5f3404..a41590dac41574 100644 --- a/lib/routes/gdufs/xwxy/xwxy-news.ts +++ b/lib/routes/gdufs/xwxy/xwxy-news.ts @@ -61,8 +61,8 @@ async function handler() { const $$ = load(articleResponse.body); const content = $$('#vsb_content .v_news_content').html() || ''; const authors = $$('.show01 p i') - .map((_, el) => $$(el).text().trim()) - .toArray(); + .toArray() + .map((el) => $$(el).text().trim()); return { ...item, From bc3abcdf9b2db633219986e7f6c802378e78da01 Mon Sep 17 00:00:00 2001 From: gz4zzxc <33158166+gz4zzxc@users.noreply.github.com> Date: Wed, 11 Dec 2024 09:26:11 +0800 Subject: [PATCH 05/10] refactor(routes): remove protocol from URLs in gdufs/news.ts Co-authored-by: Tony --- lib/routes/gdufs/news.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/gdufs/news.ts b/lib/routes/gdufs/news.ts index b96fd10f8acff9..53979614aefd05 100644 --- a/lib/routes/gdufs/news.ts +++ b/lib/routes/gdufs/news.ts @@ -20,7 +20,7 @@ export const route: Route = { }, radar: [ { - source: ['https://www.gdufs.edu.cn/gwxw/gwxw1.htm', 'https://www.gdufs.edu.cn/'], + source: ['www.gdufs.edu.cn/gwxw/gwxw1.htm', 'www.gdufs.edu.cn/'], }, ], name: '广东外语外贸大学-新闻', From 618b75f4925a53cf37d2b457d6649054bd12d975 Mon Sep 17 00:00:00 2001 From: gz4zzxc <33158166+gz4zzxc@users.noreply.github.com> Date: Wed, 11 Dec 2024 09:27:59 +0800 Subject: [PATCH 06/10] refactor(routes): simplify route name in gdufs/news.ts Co-authored-by: Tony --- lib/routes/gdufs/news.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/gdufs/news.ts b/lib/routes/gdufs/news.ts index 53979614aefd05..81b3ad25f9489e 100644 --- a/lib/routes/gdufs/news.ts +++ b/lib/routes/gdufs/news.ts @@ -23,7 +23,7 @@ export const route: Route = { source: ['www.gdufs.edu.cn/gwxw/gwxw1.htm', 'www.gdufs.edu.cn/'], }, ], - name: '广东外语外贸大学-新闻', + name: '新闻', maintainers: ['gz4zzxc'], handler, url: 'https://www.gdufs.edu.cn/gwxw/gwxw1.htm', From 57452985f5ce0eff250dd00047ba822523b35384 Mon Sep 17 00:00:00 2001 From: gz4zzxc <33158166+gz4zzxc@users.noreply.github.com> Date: Wed, 11 Dec 2024 09:28:43 +0800 Subject: [PATCH 07/10] refactor(routes): remove protocol from URLs in gdufs/news.ts Co-authored-by: Tony --- lib/routes/gdufs/news.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/gdufs/news.ts b/lib/routes/gdufs/news.ts index 81b3ad25f9489e..52c9356d7babcb 100644 --- a/lib/routes/gdufs/news.ts +++ b/lib/routes/gdufs/news.ts @@ -26,7 +26,7 @@ export const route: Route = { name: '新闻', maintainers: ['gz4zzxc'], handler, - url: 'https://www.gdufs.edu.cn/gwxw/gwxw1.htm', + url: 'www.gdufs.edu.cn/gwxw/gwxw1.htm', }; async function handler() { From c703d9dd010725bec6c21d0351b7e6b63683dfa4 Mon Sep 17 00:00:00 2001 From: gz4zzxc <33158166+gz4zzxc@users.noreply.github.com> Date: Wed, 11 Dec 2024 09:29:08 +0800 Subject: [PATCH 08/10] refactor(routes): simplify route name in gdufs/xwxy-news.ts Co-authored-by: Tony --- lib/routes/gdufs/xwxy/xwxy-news.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/routes/gdufs/xwxy/xwxy-news.ts b/lib/routes/gdufs/xwxy/xwxy-news.ts index a41590dac41574..1f231be0efee48 100644 --- a/lib/routes/gdufs/xwxy/xwxy-news.ts +++ b/lib/routes/gdufs/xwxy/xwxy-news.ts @@ -21,7 +21,7 @@ export const route: Route = { source: ['xwxy.gdufs.edu.cn/xwzx/xyxw', 'xwxy.gdufs.edu.cn/'], }, ], - name: '广东外语外贸大学新闻学院-学院新闻', + name: '新闻学院-学院新闻', maintainers: ['gz4zzxc'], handler, url: 'xwxy.gdufs.edu.cn/xwzx/xyxw', From fd0d8e25b2066b8a12636775cd516283ce4532df Mon Sep 17 00:00:00 2001 From: smuggler1743 <33158166+gz4zzxc@users.noreply.github.com> Date: Wed, 11 Dec 2024 11:46:33 +0800 Subject: [PATCH 09/10] refactor(routes): use cache to optimize article content fetching and author extraction in gdufs/news.ts and gdufs/xwxy-news.ts --- lib/routes/gdufs/news.ts | 51 ++++++++++++++++++------------ lib/routes/gdufs/xwxy/xwxy-news.ts | 49 ++++++++++++++++------------ 2 files changed, 58 insertions(+), 42 deletions(-) diff --git a/lib/routes/gdufs/news.ts b/lib/routes/gdufs/news.ts index 52c9356d7babcb..05429661306a07 100644 --- a/lib/routes/gdufs/news.ts +++ b/lib/routes/gdufs/news.ts @@ -1,5 +1,6 @@ import { Route } from '@/types'; import { load } from 'cheerio'; +import cache from '@/utils/cache'; import got from '@/utils/got'; import { parseDate } from '@/utils/parse-date'; @@ -48,34 +49,42 @@ async function handler() { const pubDate = parseDate(dateString).toUTCString(); - let description = ''; - let author = ''; + const content = await cache.tryGet(fullLink, async () => { + try { + const articleRes = await got(fullLink); + const $$ = load(articleRes.body); + const description = $$('.v_news_content').html()?.trim() || ''; - try { - const articleRes = await got(fullLink); - const $$ = load(articleRes.body); - description = $$('.v_news_content').html()?.trim() || ''; + // 提取作者信息 + let author = ''; + const authorSpans = $$('.nav01 h6 .ll span'); + authorSpans.each((_, el) => { + const text = $$(el).text().trim(); + if (text.includes('责任编辑:')) { + author = text.replace('责任编辑:', '').trim(); + } else if (text.includes('文字:')) { + author = text.replace('文字:', '').trim(); + } + }); - // 提取作者信息 - const authorSpans = $$('.nav01 h6 .ll span'); - authorSpans.each((_, el) => { - const text = $$(el).text().trim(); - if (text.includes('责任编辑:')) { - author = text.replace('责任编辑:', '').trim(); - } else if (text.includes('文字:')) { - author = text.replace('文字:', '').trim(); - } - }); - } catch { - description = '内容获取失败。'; - } + return { + description, + author, + }; + } catch { + return { + description: '内容获取失败。', + author: '', + }; + } + }); return { title, link: fullLink, - description, + description: content.description, pubDate, - author, + author: content.author, }; }) ); diff --git a/lib/routes/gdufs/xwxy/xwxy-news.ts b/lib/routes/gdufs/xwxy/xwxy-news.ts index 1f231be0efee48..0b0b065af3e62f 100644 --- a/lib/routes/gdufs/xwxy/xwxy-news.ts +++ b/lib/routes/gdufs/xwxy/xwxy-news.ts @@ -1,5 +1,6 @@ import { Route } from '@/types'; import { load } from 'cheerio'; +import cache from '@/utils/cache'; import got from '@/utils/got'; import { parseDate } from '@/utils/parse-date'; @@ -53,29 +54,35 @@ async function handler() { // 获取文章详情 const fetchArticleDetail = async (item) => { - try { - const articleResponse = await got(item.link); - if (!articleResponse.body) { - throw new Error('No article body'); + const contentData = await cache.tryGet(item.link, async () => { + try { + const articleResponse = await got(item.link); + if (!articleResponse.body) { + throw new Error('No article body'); + } + const $$ = load(articleResponse.body); + const content = $$('#vsb_content .v_news_content').html() || ''; + const authors = $$('.show01 p i') + .toArray() + .map((el) => $$(el).text().trim()); + + return { + description: content, + author: authors.join(' '), + }; + } catch { + return { + description: '无法获取内容', + author: '', + }; } - const $$ = load(articleResponse.body); - const content = $$('#vsb_content .v_news_content').html() || ''; - const authors = $$('.show01 p i') - .toArray() - .map((el) => $$(el).text().trim()); + }); - return { - ...item, - description: content, - author: authors.join(' '), - }; - } catch { - return { - ...item, - description: '无法获取内容', - author: '', - }; - } + return { + ...item, + description: contentData.description, + author: contentData.author, + }; }; const enhancedItems = await Promise.all(items.map((item) => fetchArticleDetail(item))); From 5ceaa39dac18aab37c14c6aa9698390641de2654 Mon Sep 17 00:00:00 2001 From: smuggler1743 <33158166+gz4zzxc@users.noreply.github.com> Date: Sun, 15 Dec 2024 16:02:22 +0800 Subject: [PATCH 10/10] refactor(routes): Cache the entire item object in /gdufs/news & /gdufs/xwxy-news route --- lib/routes/gdufs/news.ts | 24 +++++------- lib/routes/gdufs/xwxy/xwxy-news.ts | 62 ++++++++++++++---------------- 2 files changed, 38 insertions(+), 48 deletions(-) diff --git a/lib/routes/gdufs/news.ts b/lib/routes/gdufs/news.ts index 05429661306a07..49a284d4761d8e 100644 --- a/lib/routes/gdufs/news.ts +++ b/lib/routes/gdufs/news.ts @@ -38,7 +38,7 @@ async function handler() { const list = $('ul.list_luntan li'); const items = await Promise.all( - list.toArray().map(async (element) => { + list.toArray().map((element) => { const item = $(element); const href = item.find('a').attr('href') || ''; const title = item.find('h5').text().trim(); @@ -46,16 +46,14 @@ async function handler() { const yearMonth = item.find('h6').text().trim(); const dateString = yearMonth + '/' + day; const fullLink = href.startsWith('http') ? href : new URL(href, site).href; - const pubDate = parseDate(dateString).toUTCString(); - const content = await cache.tryGet(fullLink, async () => { + return cache.tryGet(fullLink, async () => { try { const articleRes = await got(fullLink); const $$ = load(articleRes.body); const description = $$('.v_news_content').html()?.trim() || ''; - // 提取作者信息 let author = ''; const authorSpans = $$('.nav01 h6 .ll span'); authorSpans.each((_, el) => { @@ -68,31 +66,29 @@ async function handler() { }); return { + title, + link: fullLink, description, + pubDate, author, }; } catch { return { + title, + link: fullLink, description: '内容获取失败。', + pubDate, author: '', }; } }); - - return { - title, - link: fullLink, - description: content.description, - pubDate, - author: content.author, - }; }) ); return { - title: '广东外语外贸大学-新闻', + title: '广外-大学要闻', link, - description: '广东外语外贸大学-新闻', + description: '广东外语外贸大学-大学要闻', item: items, }; } diff --git a/lib/routes/gdufs/xwxy/xwxy-news.ts b/lib/routes/gdufs/xwxy/xwxy-news.ts index 0b0b065af3e62f..2582f2457ab291 100644 --- a/lib/routes/gdufs/xwxy/xwxy-news.ts +++ b/lib/routes/gdufs/xwxy/xwxy-news.ts @@ -32,7 +32,6 @@ async function handler() { const BASE_URL = 'https://xwxy.gdufs.edu.cn'; const link = `${BASE_URL}/xwzx/xyxw.htm`; - // 获取列表页面 const response = await got(link); if (!response.body) { throw new Error('No response body'); @@ -52,40 +51,35 @@ async function handler() { }; }); - // 获取文章详情 - const fetchArticleDetail = async (item) => { - const contentData = await cache.tryGet(item.link, async () => { - try { - const articleResponse = await got(item.link); - if (!articleResponse.body) { - throw new Error('No article body'); - } - const $$ = load(articleResponse.body); - const content = $$('#vsb_content .v_news_content').html() || ''; - const authors = $$('.show01 p i') - .toArray() - .map((el) => $$(el).text().trim()); - - return { - description: content, - author: authors.join(' '), - }; - } catch { - return { - description: '无法获取内容', - author: '', - }; - } - }); + const enhancedItems = await Promise.all( + items.map((item) => + cache.tryGet(item.link, async () => { + try { + const articleResponse = await got(item.link); + if (!articleResponse.body) { + throw new Error('No article body'); + } + const $$ = load(articleResponse.body); + const content = $$('#vsb_content .v_news_content').html() || ''; + const authors = $$('.show01 p i') + .toArray() + .map((el) => $$(el).text().trim()); - return { - ...item, - description: contentData.description, - author: contentData.author, - }; - }; - - const enhancedItems = await Promise.all(items.map((item) => fetchArticleDetail(item))); + return { + ...item, + description: content, + author: authors.join(' '), + }; + } catch { + return { + ...item, + description: '无法获取内容', + author: '', + }; + } + }) + ) + ); return { title: '广外新传学院-学院新闻',