Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(routes/shu): add routes for SHU's Int'l Dept, Grad School, and Campus Highlights. #17730

Merged
merged 11 commits into from
Nov 27, 2024
94 changes: 94 additions & 0 deletions lib/routes/shu/global.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio'; // [email protected]
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

const noticeType = {
tzgg: { title: '上海大学国际部港澳台-通知公告', url: 'https://global.shu.edu.cn/cd/tzgg/3.htm' },
};

export const route: Route = {
path: '/global/:type?',
categories: ['university'],
example: '/shu/global/tzgg',
parameters: { type: '分类,默认为通知公告' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['global.shu.edu.cn/'],
target: '/global',
},
],
name: '国际部港澳台办公室',
maintainers: ['GhhG123'],
handler,
url: 'global.shu.edu.cn/',
description: `| 通知公告 |
| -------- |
| tzgg |`,
};

async function handler(ctx) {
const type = ctx.req.param('type') ?? 'tzgg';
const rootUrl = 'https://global.shu.edu.cn';

// 发起 HTTP GET 请求
const response = await got({
method: 'get',

/* headers: {
'user-agent': UA,
cookie: await getCookie(ctx),
}, */
url: noticeType[type].url,
});

const $ = load(response.data);

const list = $('div.only-list1 ul li') // 定位到HTML结构中的li元素
.toArray()
.map((el) => {
const item = $(el); // 使用Cheerio包装每个li元素
const rawLink = item.find('a').attr('href');
const pubDate = item.find('span').text().trim(); // 提取日期

return {
title: item.find('a').text().trim(), // 获取标题
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接
pubDate: timezone(parseDate(pubDate, 'YYYY年MM月DD日'), +8), // 解析并转换日期
description: '', // 没有提供简要描述,设为空字符串
};
});

const items = await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const detailResponse = await got({
method: 'get',
url: item.link
}); // 获取详情页内容
const content = load(detailResponse.data); // 使用cheerio解析内容

item.description = content('#vsb_content_2 .v_news_content').html() || '内容无法提取';// 提取内容区详情

return item; // 返回完整的item
})
)
);

return {
title: noticeType[type].title,
description: noticeType[type].title,
link: noticeType[type].url,
item: items,
};
}
106 changes: 106 additions & 0 deletions lib/routes/shu/gs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio'; // [email protected]
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

const noticeType = {
zhxw: { title: '上海大学研究生院-综合新闻', url: 'https://gs.shu.edu.cn/xwlb/zh.htm' }, // 综合新闻
pygl: { title: '上海大学研究生院-培养管理', url: 'https://gs.shu.edu.cn/xwlb/py.htm' }, // local //BUG error: Request https://gs1.shu.edu.cn:8080/py/KCBInfo.asp fail: TypeError: fetch failed
gjjl: { title: '上海大学研究生院-国际交流', url: 'https://gs.shu.edu.cn/xwlb/gjjl.htm' },
};

export const route: Route = {
path: '/gs/:type?',
categories: ['university'],
example: '/shu/gs/zhxw',
parameters: { type: '分类,默认为学术公告' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['gs.shu.edu.cn/'],
target: '/gs',
},
],
name: '研究生院',
maintainers: ['GhhG123'],
handler,
url: 'gs.shu.edu.cn/',
description: `| 综合新闻 | 培养管理 | 国际交流 |
| -------- | --------- | --------- |
| zhxw | pygl | gjjl |`,
};

async function handler(ctx) {
const type = ctx.req.param('type') ?? 'zhxw';
const rootUrl = 'https://gs.shu.edu.cn';

// 发起 HTTP GET 请求
const response = await got({
GhhG123 marked this conversation as resolved.
Show resolved Hide resolved
method: 'get',

/* headers: {
'user-agent': UA,
cookie: await getCookie(ctx),
}, */
url: noticeType[type].url,
});

const $ = load(response.data);

const list = $('tr[id^="line_u17_"]') // 定位到每个包含新闻的<tr>元素
.toArray()
.map((el) => {
const item = $(el); // 使用Cheerio包装每个<tr>元素
const rawLink = item.find('a').attr('href'); // 获取链接
const title = item.find('a').text().trim(); // 获取标题
const dateParts = item.find('td').eq(1).text().trim(); // 获取日期

return {
title, // 获取标题
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接
pubDate: timezone(parseDate(dateParts, 'YYYY/MM/DD HH:mm:ss'), +8), // 解析日期
description: item.find('td').eq(2).text().trim(), // 提取访问次数或其他信息
};
});

const items = await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const url = new URL(item.link); // 创建 URL 对象以验证链接
// 确保链接是以正确的域名开头,并且不为空
if (url.hostname === 'gs1.shu.edu.cn') { // 需校内访问
// Skip or handle differently for URLs with gs1.shu.edu.cn domain
item.description = 'gs1.shu.edu.cn, 无法直接获取';
return item;
}

const detailResponse = await got({
method: 'get',
url: item.link
}); // 获取详情页内容
const content = load(detailResponse.data); // 使用cheerio解析内容

item.description = content('#vsb_content .v_news_content').html() || item.description;


return item; // 返回完整的item
})
)
);

return {
title: noticeType[type].title,
description: noticeType[type].title,
link: noticeType[type].url,
item: items,
};
}
99 changes: 58 additions & 41 deletions lib/routes/shu/index.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio';
import { load } from 'cheerio'; // [email protected]
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

const host = 'https://www.shu.edu.cn/';
const alias = new Map([
['news', 'zhxw'], // 综合新闻
['research', 'kydt1'], // 科研动态
['kydt', 'kydt1'], // 科研动态
['notice', 'tzgg'], // 通知公告
['important', 'zyxw'], // 重要新闻
]);
const noticeType = {
tzgg: { title: '上海大学 - 通知公告', url: 'https://www.shu.edu.cn/tzgg.htm' },
zyxw: { title: '上海大学 - 重要新闻', url: 'https://www.shu.edu.cn/zyxw.htm' },
};

export const route: Route = {
path: '/:type?',
path: '/news/:type?',
categories: ['university'],
example: '/shu/news',
parameters: { type: '消息类型,默认为`news`' },
example: '/shu/news/tzgg',
parameters: { type: '分类,默认为通知公告' },
features: {
requireConfig: false,
requirePuppeteer: false,
Expand All @@ -28,50 +25,70 @@ export const route: Route = {
},
radar: [
{
source: ['www.shu.edu.cn/:type'],
target: '/:type',
source: ['www.shu.edu.cn/'],
target: '/news',
},
],
name: '官网信息',
maintainers: ['lonelyion'],
name: '官网通知公告',
maintainers: ['lonelyion', 'GhhG123'],
handler,
description: `| 综合新闻 | 科研动态 | 通知公告 | 重要新闻 |
| -------- | -------- | -------- | --------- |
| news | research | notice | important |`,
url: 'www.shu.edu.cn/',
description: `| 通知公告 | 重要新闻 |
| -------- | --------- |
| tzgg | zyxw |`,
};

async function handler(ctx) {
const type = ctx.req.param('type') || 'news';
const link = `https://www.shu.edu.cn/${alias.get(type) || type}.htm`;
const respond = await got.get(link);
const $ = load(respond.data);
const title = $('title').text();
const list = $('.ej_main .list')
.find('li')
.slice(0, 5)
const type = ctx.req.param('type') ?? 'tzgg';
const rootUrl = 'https://www.shu.edu.cn';

// 发起 HTTP GET 请求
const response = await got({
GhhG123 marked this conversation as resolved.
Show resolved Hide resolved
method: 'get',

/* headers: {
'user-agent': UA,
cookie: await getCookie(ctx),
}, */
url: noticeType[type].url,
});

const $ = load(response.data);

const list = $('div.list ul li') // 以下获取信息需要根据网页结构定制
// For cheerio 1.x.x . The item parameter in the .map callback is now explicitly typed as a Cheerio<Element>, not just Element. --fixed
.toArray()
.map((ele) => ({
title: $(ele).find('.bt').text(),
link: new URL($(ele).find('a').attr('href'), host).href,
date: $(ele).find('.sj').text(),
}));
.map((el) => {
const item = $(el); // Wrap `el` in a Cheerio object
const rawLink = item.find('a').attr('href');
return {
title: item.find('p.bt').text().trim(),
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl,
pubDate: timezone(parseDate(item.find('p.sj').text().trim(), 'YYYY.MM.DD'), +8),
description: item.find('p.zy').text().trim(),
};
});

const all = await Promise.all(
const items = await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const response = await got.get(item.link);
const $ = load(response.data);
item.author = $('.xx>:nth-child(2)').text().trim().slice(3); // 投稿:xxx
item.pubDate = parseDate(item.date, 'YYYY.MM.DD');
item.description = $('.v_news_content').html() || item.title;
const detailResponse = await got({
method: 'get',
url: item.link
});
const content = load(detailResponse.data);

item.description = content('#vsb_content .v_news_content').html() || item.description;

return item;
})
)
);

return {
title,
link,
item: all,
title: noticeType[type].title,
description: noticeType[type].title,
link: noticeType[type].url,
item: items,
};
}
14 changes: 7 additions & 7 deletions lib/routes/shu/jwb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@ const host = 'https://jwb.shu.edu.cn/';
const alias = new Map([
['notice', 'tzgg'], // 通知公告
['news', 'xw'], // 新闻动态
['policy', 'zcwj'], // 政策文件
/* ['policy', 'zcwj'], 政策文件 //BUG */
]);

export const route: Route = {
path: ['/jwc/:type?', '/jwb/:type?'],
path: ['/jwb/:type?'],
radar: [
{
source: ['www.shu.edu.cn/:type'],
target: '/:type',
source: ['www.shu.edu.cn/index'],
target: '/:type?',
},
],
name: 'Unknown',
maintainers: [],
name: '教务部',
maintainers: ['tuxinghuan', 'GhhG123'],
handler,
description: `| 通知通告 | 新闻 | 政策文件 |
description: `| 通知通告 | 新闻 | 政策文件(bug) |
| -------- | ---- | -------- |
| notice | news | policy |`,
};
Expand Down
3 changes: 2 additions & 1 deletion lib/routes/shu/namespace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: '上海大学',
url: 'jwb.shu.edu.cn',
url: 'www.shu.edu.cn',
description: '上海大学相关网网站',
lang: 'zh-CN',
};
Loading