From f59d7dae2bc120dec4e4c949401c1634387f1b87 Mon Sep 17 00:00:00 2001 From: GhhG123 <100225935+GhhG123@users.noreply.github.com> Date: Thu, 28 Nov 2024 01:30:00 +0800 Subject: [PATCH] feat(routes/shu): add routes for SHU's Int'l Dept, Grad School, and Campus Highlights. (#17730) * feat(routes/shu): add routes for SHU's Int'l Dept, Grad School, and Campus Highlights - Corrected the root URL in `index.ts`. - Added routes for: - SHU's International Department (Int'l Dept). - Graduate School (Grad School). - Campus Highlights. - Noted the unavailability of the policy in `jwb.ts` with a comment in `index.ts`. * Update lib/routes/shu/index.ts Co-authored-by: Tony * Update lib/routes/shu/jwb.ts Co-authored-by: Tony * Apply camelCase to variable names across the project. * Refactor: change to use detailed request format for GET request. * feat: refine content extraction and fix gs.shu.edu.cn issues - Refactored content extraction to focus on specific descriptions. - Added exception handling for inaccessible gs1.shu.edu.cn links. - Fixed bug where gs.shu.edu.cn content could not be retrieved. - Fixed Code scanning/ESLint warning: replaced disallowed syntax with .toArray(). * fix: Resolve ESLint warnings and errors * Update lib/routes/shu/xykd.ts Co-authored-by: Tony * fix: Resolve ESLint warnings and errors again * fix: Resolve ESLint warnings and errors --------- --- lib/routes/shu/global.ts | 94 ++++++++++++++++++++++++++++++++ lib/routes/shu/gs.ts | 106 ++++++++++++++++++++++++++++++++++++ lib/routes/shu/index.ts | 99 +++++++++++++++++++-------------- lib/routes/shu/jwb.ts | 14 ++--- lib/routes/shu/namespace.ts | 3 +- lib/routes/shu/xykd.ts | 98 +++++++++++++++++++++++++++++++++ 6 files changed, 365 insertions(+), 49 deletions(-) create mode 100644 lib/routes/shu/global.ts create mode 100644 lib/routes/shu/gs.ts create mode 100644 lib/routes/shu/xykd.ts diff --git a/lib/routes/shu/global.ts b/lib/routes/shu/global.ts new file mode 100644 index 00000000000000..b4e91d37b1857e --- /dev/null +++ b/lib/routes/shu/global.ts @@ -0,0 +1,94 @@ +import { Route } from '@/types'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { load } from 'cheerio'; // cheerio@1.0.0 +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; + +const noticeType = { + tzgg: { title: '上海大学国际部港澳台-通知公告', url: 'https://global.shu.edu.cn/cd/tzgg/3.htm' }, +}; + +export const route: Route = { + path: '/global/:type?', + categories: ['university'], + example: '/shu/global/tzgg', + parameters: { type: '分类,默认为通知公告' }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['global.shu.edu.cn/'], + target: '/global', + }, + ], + name: '国际部港澳台办公室', + maintainers: ['GhhG123'], + handler, + url: 'global.shu.edu.cn/', + description: `| 通知公告 | + | -------- | + | tzgg |`, +}; + +async function handler(ctx) { + const type = ctx.req.param('type') ?? 'tzgg'; + const rootUrl = 'https://global.shu.edu.cn'; + + // 发起 HTTP GET 请求 + const response = await got({ + method: 'get', + + /* headers: { + 'user-agent': UA, + cookie: await getCookie(ctx), + }, */ + url: noticeType[type].url, + }); + + const $ = load(response.data); + + const list = $('div.only-list1 ul li') // 定位到HTML结构中的li元素 + .toArray() + .map((el) => { + const item = $(el); // 使用Cheerio包装每个li元素 + const rawLink = item.find('a').attr('href'); + const pubDate = item.find('span').text().trim(); // 提取日期 + + return { + title: item.find('a').text().trim(), // 获取标题 + link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接 + pubDate: timezone(parseDate(pubDate, 'YYYY年MM月DD日'), +8), // 解析并转换日期 + description: '', // 没有提供简要描述,设为空字符串 + }; + }); + + const items = await Promise.all( + list.map((item) => + cache.tryGet(item.link, async () => { + const detailResponse = await got({ + method: 'get', + url: item.link + }); // 获取详情页内容 + const content = load(detailResponse.data); // 使用cheerio解析内容 + + item.description = content('#vsb_content_2 .v_news_content').html() || '内容无法提取';// 提取内容区详情 + + return item; // 返回完整的item + }) + ) + ); + + return { + title: noticeType[type].title, + description: noticeType[type].title, + link: noticeType[type].url, + item: items, + }; +} diff --git a/lib/routes/shu/gs.ts b/lib/routes/shu/gs.ts new file mode 100644 index 00000000000000..adf7a97eae76d8 --- /dev/null +++ b/lib/routes/shu/gs.ts @@ -0,0 +1,106 @@ +import { Route } from '@/types'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { load } from 'cheerio'; // cheerio@1.0.0 +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; + +const noticeType = { + zhxw: { title: '上海大学研究生院-综合新闻', url: 'https://gs.shu.edu.cn/xwlb/zh.htm' }, // 综合新闻 + pygl: { title: '上海大学研究生院-培养管理', url: 'https://gs.shu.edu.cn/xwlb/py.htm' }, // local //BUG error: Request https://gs1.shu.edu.cn:8080/py/KCBInfo.asp fail: TypeError: fetch failed + gjjl: { title: '上海大学研究生院-国际交流', url: 'https://gs.shu.edu.cn/xwlb/gjjl.htm' }, +}; + +export const route: Route = { + path: '/gs/:type?', + categories: ['university'], + example: '/shu/gs/zhxw', + parameters: { type: '分类,默认为学术公告' }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['gs.shu.edu.cn/'], + target: '/gs', + }, + ], + name: '研究生院', + maintainers: ['GhhG123'], + handler, + url: 'gs.shu.edu.cn/', + description: `| 综合新闻 | 培养管理 | 国际交流 | + | -------- | --------- | --------- | + | zhxw | pygl | gjjl |`, +}; + +async function handler(ctx) { + const type = ctx.req.param('type') ?? 'zhxw'; + const rootUrl = 'https://gs.shu.edu.cn'; + + // 发起 HTTP GET 请求 + const response = await got({ + method: 'get', + + /* headers: { + 'user-agent': UA, + cookie: await getCookie(ctx), + }, */ + url: noticeType[type].url, + }); + + const $ = load(response.data); + + const list = $('tr[id^="line_u17_"]') // 定位到每个包含新闻的元素 + .toArray() + .map((el) => { + const item = $(el); // 使用Cheerio包装每个元素 + const rawLink = item.find('a').attr('href'); // 获取链接 + const title = item.find('a').text().trim(); // 获取标题 + const dateParts = item.find('td').eq(1).text().trim(); // 获取日期 + + return { + title, // 获取标题 + link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接 + pubDate: timezone(parseDate(dateParts, 'YYYY/MM/DD HH:mm:ss'), +8), // 解析日期 + description: item.find('td').eq(2).text().trim(), // 提取访问次数或其他信息 + }; + }); + + const items = await Promise.all( + list.map((item) => + cache.tryGet(item.link, async () => { + const url = new URL(item.link); // 创建 URL 对象以验证链接 + // 确保链接是以正确的域名开头,并且不为空 + if (url.hostname === 'gs1.shu.edu.cn') { // 需校内访问 + // Skip or handle differently for URLs with gs1.shu.edu.cn domain + item.description = 'gs1.shu.edu.cn, 无法直接获取'; + return item; + } + + const detailResponse = await got({ + method: 'get', + url: item.link + }); // 获取详情页内容 + const content = load(detailResponse.data); // 使用cheerio解析内容 + + item.description = content('#vsb_content .v_news_content').html() || item.description; + + + return item; // 返回完整的item + }) + ) + ); + + return { + title: noticeType[type].title, + description: noticeType[type].title, + link: noticeType[type].url, + item: items, + }; +} diff --git a/lib/routes/shu/index.ts b/lib/routes/shu/index.ts index 33dabbd888d754..2a07078226a06d 100644 --- a/lib/routes/shu/index.ts +++ b/lib/routes/shu/index.ts @@ -1,23 +1,20 @@ import { Route } from '@/types'; import cache from '@/utils/cache'; import got from '@/utils/got'; -import { load } from 'cheerio'; +import { load } from 'cheerio'; // cheerio@1.0.0 import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; -const host = 'https://www.shu.edu.cn/'; -const alias = new Map([ - ['news', 'zhxw'], // 综合新闻 - ['research', 'kydt1'], // 科研动态 - ['kydt', 'kydt1'], // 科研动态 - ['notice', 'tzgg'], // 通知公告 - ['important', 'zyxw'], // 重要新闻 -]); +const noticeType = { + tzgg: { title: '上海大学 - 通知公告', url: 'https://www.shu.edu.cn/tzgg.htm' }, + zyxw: { title: '上海大学 - 重要新闻', url: 'https://www.shu.edu.cn/zyxw.htm' }, +}; export const route: Route = { - path: '/:type?', + path: '/news/:type?', categories: ['university'], - example: '/shu/news', - parameters: { type: '消息类型,默认为`news`' }, + example: '/shu/news/tzgg', + parameters: { type: '分类,默认为通知公告' }, features: { requireConfig: false, requirePuppeteer: false, @@ -28,50 +25,70 @@ export const route: Route = { }, radar: [ { - source: ['www.shu.edu.cn/:type'], - target: '/:type', + source: ['www.shu.edu.cn/'], + target: '/news', }, ], - name: '官网信息', - maintainers: ['lonelyion'], + name: '官网通知公告', + maintainers: ['lonelyion', 'GhhG123'], handler, - description: `| 综合新闻 | 科研动态 | 通知公告 | 重要新闻 | - | -------- | -------- | -------- | --------- | - | news | research | notice | important |`, + url: 'www.shu.edu.cn/', + description: `| 通知公告 | 重要新闻 | + | -------- | --------- | + | tzgg | zyxw |`, }; async function handler(ctx) { - const type = ctx.req.param('type') || 'news'; - const link = `https://www.shu.edu.cn/${alias.get(type) || type}.htm`; - const respond = await got.get(link); - const $ = load(respond.data); - const title = $('title').text(); - const list = $('.ej_main .list') - .find('li') - .slice(0, 5) + const type = ctx.req.param('type') ?? 'tzgg'; + const rootUrl = 'https://www.shu.edu.cn'; + + // 发起 HTTP GET 请求 + const response = await got({ + method: 'get', + + /* headers: { + 'user-agent': UA, + cookie: await getCookie(ctx), + }, */ + url: noticeType[type].url, + }); + + const $ = load(response.data); + + const list = $('div.list ul li') // 以下获取信息需要根据网页结构定制 + // For cheerio 1.x.x . The item parameter in the .map callback is now explicitly typed as a Cheerio, not just Element. --fixed .toArray() - .map((ele) => ({ - title: $(ele).find('.bt').text(), - link: new URL($(ele).find('a').attr('href'), host).href, - date: $(ele).find('.sj').text(), - })); + .map((el) => { + const item = $(el); // Wrap `el` in a Cheerio object + const rawLink = item.find('a').attr('href'); + return { + title: item.find('p.bt').text().trim(), + link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, + pubDate: timezone(parseDate(item.find('p.sj').text().trim(), 'YYYY.MM.DD'), +8), + description: item.find('p.zy').text().trim(), + }; + }); - const all = await Promise.all( + const items = await Promise.all( list.map((item) => cache.tryGet(item.link, async () => { - const response = await got.get(item.link); - const $ = load(response.data); - item.author = $('.xx>:nth-child(2)').text().trim().slice(3); // 投稿:xxx - item.pubDate = parseDate(item.date, 'YYYY.MM.DD'); - item.description = $('.v_news_content').html() || item.title; + const detailResponse = await got({ + method: 'get', + url: item.link + }); + const content = load(detailResponse.data); + + item.description = content('#vsb_content .v_news_content').html() || item.description; + return item; }) ) ); return { - title, - link, - item: all, + title: noticeType[type].title, + description: noticeType[type].title, + link: noticeType[type].url, + item: items, }; } diff --git a/lib/routes/shu/jwb.ts b/lib/routes/shu/jwb.ts index 6870cf345e1907..cc5d00a145b52b 100644 --- a/lib/routes/shu/jwb.ts +++ b/lib/routes/shu/jwb.ts @@ -8,21 +8,21 @@ const host = 'https://jwb.shu.edu.cn/'; const alias = new Map([ ['notice', 'tzgg'], // 通知公告 ['news', 'xw'], // 新闻动态 - ['policy', 'zcwj'], // 政策文件 + /* ['policy', 'zcwj'], 政策文件 //BUG */ ]); export const route: Route = { - path: ['/jwc/:type?', '/jwb/:type?'], + path: ['/jwb/:type?'], radar: [ { - source: ['www.shu.edu.cn/:type'], - target: '/:type', + source: ['www.shu.edu.cn/index'], + target: '/:type?', }, ], - name: 'Unknown', - maintainers: [], + name: '教务部', + maintainers: ['tuxinghuan', 'GhhG123'], handler, - description: `| 通知通告 | 新闻 | 政策文件 | + description: `| 通知通告 | 新闻 | 政策文件(bug) | | -------- | ---- | -------- | | notice | news | policy |`, }; diff --git a/lib/routes/shu/namespace.ts b/lib/routes/shu/namespace.ts index 2422221655cd7e..35930c2a216759 100644 --- a/lib/routes/shu/namespace.ts +++ b/lib/routes/shu/namespace.ts @@ -2,6 +2,7 @@ import type { Namespace } from '@/types'; export const namespace: Namespace = { name: '上海大学', - url: 'jwb.shu.edu.cn', + url: 'www.shu.edu.cn', + description: '上海大学相关网网站', lang: 'zh-CN', }; diff --git a/lib/routes/shu/xykd.ts b/lib/routes/shu/xykd.ts new file mode 100644 index 00000000000000..8dd2aca955108d --- /dev/null +++ b/lib/routes/shu/xykd.ts @@ -0,0 +1,98 @@ +import { Route } from '@/types'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { load } from 'cheerio'; // cheerio@1.0.0 +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; + +const noticeType = { + whxx: { title: '上海大学 - 文化信息', url: 'https://www.shu.edu.cn/xnrc/whxx.htm' }, + xsbg: { title: '上海大学 - 学术报告', url: 'https://www.shu.edu.cn/xnrc/xsbg.htm' }, +}; + +export const route: Route = { + path: '/xykd/:type?', + categories: ['university'], + example: '/shu/xykd/xsbg', + parameters: { type: '分类,默认为学术公告' }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['www.shu.edu.cn/'], + target: '/xykd', + }, + ], + name: '校园看点', + maintainers: ['GhhG123'], + handler, + url: 'www.shu.edu.cn/', + description: `| 文化信息 | 学术报告 | + | -------- | --------- | + | whxx | xsbg |`, +}; + +async function handler(ctx) { + const type = ctx.req.param('type') ?? 'xsbg'; + const rootUrl = 'https://www.shu.edu.cn'; + + // 发起 HTTP GET 请求 + const response = await got({ + method: 'get', + + /* headers: { + 'user-agent': UA, + cookie: await getCookie(ctx), + }, */ + url: noticeType[type].url, + }); + + const $ = load(response.data); + + const list = $('div.xsbg_list ul li') // 定位到HTML结构中的li元素 + .toArray() + .map((el) => { + const item = $(el); // 使用Cheerio包装每个li元素 + const rawLink = item.find('a').attr('href'); + const dateParts = item + .find('div.sj p') + .toArray() + .map((p) => $(p).text().trim()); // 提取日期部分 + + return { + title: item.find('p.bt').text().trim(), // 获取标题 + link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接 + pubDate: timezone(parseDate(`${dateParts[1]}-${dateParts[0]}`, 'MM-DD'), +8), // 拼接并解析日期 + description: item.find('div.zy').text().trim(), // 提取简要描述 + }; + }); + + const items = await Promise.all( + list.map((item) => + cache.tryGet(item.link, async () => { + const detailResponse = await got({ + method: 'get', + url: item.link + }); // 获取详情页内容 + const content = load(detailResponse.data); // 使用cheerio解析内容 + + item.description = content('#vsb_content_500 .v_news_content').html() || item.description; // 提取内容区详情 + + return item; // 返回完整的item + }) + ) + ); + + return { + title: noticeType[type].title, + description: noticeType[type].title, + link: noticeType[type].url, + item: items, + }; +}