Skip to content

Commit

Permalink
feat(routes/shu): add routes for SHU's Int'l Dept, Grad School, and C…
Browse files Browse the repository at this point in the history
…ampus Highlights. (#17730)

* feat(routes/shu): add routes for SHU's Int'l Dept, Grad School, and Campus Highlights

- Corrected the root URL in `index.ts`.
- Added routes for:
  - SHU's International Department (Int'l Dept).
  - Graduate School (Grad School).
  - Campus Highlights.
- Noted the unavailability of the policy in `jwb.ts` with a comment in `index.ts`.

* Update lib/routes/shu/index.ts

Co-authored-by: Tony <[email protected]>

* Update lib/routes/shu/jwb.ts

Co-authored-by: Tony <[email protected]>

* Apply camelCase to variable names across the project.

* Refactor: change to use detailed request format for GET request.

* feat: refine content extraction and fix gs.shu.edu.cn issues

- Refactored content extraction to focus on specific descriptions.
- Added exception handling for inaccessible gs1.shu.edu.cn links.
- Fixed bug where gs.shu.edu.cn content could not be retrieved.
- Fixed Code scanning/ESLint warning: replaced disallowed syntax with .toArray().

* fix: Resolve ESLint warnings and errors

* Update lib/routes/shu/xykd.ts

Co-authored-by: Tony <[email protected]>

* fix: Resolve ESLint warnings and errors again

* fix: Resolve ESLint warnings and errors

---------
  • Loading branch information
GhhG123 authored Nov 27, 2024
1 parent b6956b2 commit f59d7da
Show file tree
Hide file tree
Showing 6 changed files with 365 additions and 49 deletions.
94 changes: 94 additions & 0 deletions lib/routes/shu/global.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio'; // [email protected]
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

const noticeType = {
tzgg: { title: '上海大学国际部港澳台-通知公告', url: 'https://global.shu.edu.cn/cd/tzgg/3.htm' },
};

export const route: Route = {
path: '/global/:type?',
categories: ['university'],
example: '/shu/global/tzgg',
parameters: { type: '分类,默认为通知公告' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['global.shu.edu.cn/'],
target: '/global',
},
],
name: '国际部港澳台办公室',
maintainers: ['GhhG123'],
handler,
url: 'global.shu.edu.cn/',
description: `| 通知公告 |
| -------- |
| tzgg |`,
};

async function handler(ctx) {
const type = ctx.req.param('type') ?? 'tzgg';
const rootUrl = 'https://global.shu.edu.cn';

// 发起 HTTP GET 请求
const response = await got({
method: 'get',

/* headers: {
'user-agent': UA,
cookie: await getCookie(ctx),
}, */
url: noticeType[type].url,
});

const $ = load(response.data);

const list = $('div.only-list1 ul li') // 定位到HTML结构中的li元素
.toArray()
.map((el) => {
const item = $(el); // 使用Cheerio包装每个li元素
const rawLink = item.find('a').attr('href');
const pubDate = item.find('span').text().trim(); // 提取日期

return {
title: item.find('a').text().trim(), // 获取标题
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接
pubDate: timezone(parseDate(pubDate, 'YYYY年MM月DD日'), +8), // 解析并转换日期
description: '', // 没有提供简要描述,设为空字符串
};
});

const items = await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const detailResponse = await got({
method: 'get',
url: item.link
}); // 获取详情页内容
const content = load(detailResponse.data); // 使用cheerio解析内容

item.description = content('#vsb_content_2 .v_news_content').html() || '内容无法提取';// 提取内容区详情

return item; // 返回完整的item
})
)
);

return {
title: noticeType[type].title,
description: noticeType[type].title,
link: noticeType[type].url,
item: items,
};
}
106 changes: 106 additions & 0 deletions lib/routes/shu/gs.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio'; // [email protected]
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

const noticeType = {
zhxw: { title: '上海大学研究生院-综合新闻', url: 'https://gs.shu.edu.cn/xwlb/zh.htm' }, // 综合新闻
pygl: { title: '上海大学研究生院-培养管理', url: 'https://gs.shu.edu.cn/xwlb/py.htm' }, // local //BUG error: Request https://gs1.shu.edu.cn:8080/py/KCBInfo.asp fail: TypeError: fetch failed
gjjl: { title: '上海大学研究生院-国际交流', url: 'https://gs.shu.edu.cn/xwlb/gjjl.htm' },
};

export const route: Route = {
path: '/gs/:type?',
categories: ['university'],
example: '/shu/gs/zhxw',
parameters: { type: '分类,默认为学术公告' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['gs.shu.edu.cn/'],
target: '/gs',
},
],
name: '研究生院',
maintainers: ['GhhG123'],
handler,
url: 'gs.shu.edu.cn/',
description: `| 综合新闻 | 培养管理 | 国际交流 |
| -------- | --------- | --------- |
| zhxw | pygl | gjjl |`,
};

async function handler(ctx) {
const type = ctx.req.param('type') ?? 'zhxw';
const rootUrl = 'https://gs.shu.edu.cn';

// 发起 HTTP GET 请求
const response = await got({
method: 'get',

/* headers: {
'user-agent': UA,
cookie: await getCookie(ctx),
}, */
url: noticeType[type].url,
});

const $ = load(response.data);

const list = $('tr[id^="line_u17_"]') // 定位到每个包含新闻的<tr>元素
.toArray()
.map((el) => {
const item = $(el); // 使用Cheerio包装每个<tr>元素
const rawLink = item.find('a').attr('href'); // 获取链接
const title = item.find('a').text().trim(); // 获取标题
const dateParts = item.find('td').eq(1).text().trim(); // 获取日期

return {
title, // 获取标题
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl, // 生成完整链接
pubDate: timezone(parseDate(dateParts, 'YYYY/MM/DD HH:mm:ss'), +8), // 解析日期
description: item.find('td').eq(2).text().trim(), // 提取访问次数或其他信息
};
});

const items = await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const url = new URL(item.link); // 创建 URL 对象以验证链接
// 确保链接是以正确的域名开头,并且不为空
if (url.hostname === 'gs1.shu.edu.cn') { // 需校内访问
// Skip or handle differently for URLs with gs1.shu.edu.cn domain
item.description = 'gs1.shu.edu.cn, 无法直接获取';
return item;
}

const detailResponse = await got({
method: 'get',
url: item.link
}); // 获取详情页内容
const content = load(detailResponse.data); // 使用cheerio解析内容

item.description = content('#vsb_content .v_news_content').html() || item.description;


return item; // 返回完整的item
})
)
);

return {
title: noticeType[type].title,
description: noticeType[type].title,
link: noticeType[type].url,
item: items,
};
}
99 changes: 58 additions & 41 deletions lib/routes/shu/index.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio';
import { load } from 'cheerio'; // [email protected]
import { parseDate } from '@/utils/parse-date';
import timezone from '@/utils/timezone';

const host = 'https://www.shu.edu.cn/';
const alias = new Map([
['news', 'zhxw'], // 综合新闻
['research', 'kydt1'], // 科研动态
['kydt', 'kydt1'], // 科研动态
['notice', 'tzgg'], // 通知公告
['important', 'zyxw'], // 重要新闻
]);
const noticeType = {
tzgg: { title: '上海大学 - 通知公告', url: 'https://www.shu.edu.cn/tzgg.htm' },
zyxw: { title: '上海大学 - 重要新闻', url: 'https://www.shu.edu.cn/zyxw.htm' },
};

export const route: Route = {
path: '/:type?',
path: '/news/:type?',
categories: ['university'],
example: '/shu/news',
parameters: { type: '消息类型,默认为`news`' },
example: '/shu/news/tzgg',
parameters: { type: '分类,默认为通知公告' },
features: {
requireConfig: false,
requirePuppeteer: false,
Expand All @@ -28,50 +25,70 @@ export const route: Route = {
},
radar: [
{
source: ['www.shu.edu.cn/:type'],
target: '/:type',
source: ['www.shu.edu.cn/'],
target: '/news',
},
],
name: '官网信息',
maintainers: ['lonelyion'],
name: '官网通知公告',
maintainers: ['lonelyion', 'GhhG123'],
handler,
description: `| 综合新闻 | 科研动态 | 通知公告 | 重要新闻 |
| -------- | -------- | -------- | --------- |
| news | research | notice | important |`,
url: 'www.shu.edu.cn/',
description: `| 通知公告 | 重要新闻 |
| -------- | --------- |
| tzgg | zyxw |`,
};

async function handler(ctx) {
const type = ctx.req.param('type') || 'news';
const link = `https://www.shu.edu.cn/${alias.get(type) || type}.htm`;
const respond = await got.get(link);
const $ = load(respond.data);
const title = $('title').text();
const list = $('.ej_main .list')
.find('li')
.slice(0, 5)
const type = ctx.req.param('type') ?? 'tzgg';
const rootUrl = 'https://www.shu.edu.cn';

// 发起 HTTP GET 请求
const response = await got({
method: 'get',

/* headers: {
'user-agent': UA,
cookie: await getCookie(ctx),
}, */
url: noticeType[type].url,
});

const $ = load(response.data);

const list = $('div.list ul li') // 以下获取信息需要根据网页结构定制
// For cheerio 1.x.x . The item parameter in the .map callback is now explicitly typed as a Cheerio<Element>, not just Element. --fixed
.toArray()
.map((ele) => ({
title: $(ele).find('.bt').text(),
link: new URL($(ele).find('a').attr('href'), host).href,
date: $(ele).find('.sj').text(),
}));
.map((el) => {
const item = $(el); // Wrap `el` in a Cheerio object
const rawLink = item.find('a').attr('href');
return {
title: item.find('p.bt').text().trim(),
link: rawLink ? new URL(rawLink, rootUrl).href : rootUrl,
pubDate: timezone(parseDate(item.find('p.sj').text().trim(), 'YYYY.MM.DD'), +8),
description: item.find('p.zy').text().trim(),
};
});

const all = await Promise.all(
const items = await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const response = await got.get(item.link);
const $ = load(response.data);
item.author = $('.xx>:nth-child(2)').text().trim().slice(3); // 投稿:xxx
item.pubDate = parseDate(item.date, 'YYYY.MM.DD');
item.description = $('.v_news_content').html() || item.title;
const detailResponse = await got({
method: 'get',
url: item.link
});
const content = load(detailResponse.data);

item.description = content('#vsb_content .v_news_content').html() || item.description;

return item;
})
)
);

return {
title,
link,
item: all,
title: noticeType[type].title,
description: noticeType[type].title,
link: noticeType[type].url,
item: items,
};
}
14 changes: 7 additions & 7 deletions lib/routes/shu/jwb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@ const host = 'https://jwb.shu.edu.cn/';
const alias = new Map([
['notice', 'tzgg'], // 通知公告
['news', 'xw'], // 新闻动态
['policy', 'zcwj'], // 政策文件
/* ['policy', 'zcwj'], 政策文件 //BUG */
]);

export const route: Route = {
path: ['/jwc/:type?', '/jwb/:type?'],
path: ['/jwb/:type?'],
radar: [
{
source: ['www.shu.edu.cn/:type'],
target: '/:type',
source: ['www.shu.edu.cn/index'],
target: '/:type?',
},
],
name: 'Unknown',
maintainers: [],
name: '教务部',
maintainers: ['tuxinghuan', 'GhhG123'],
handler,
description: `| 通知通告 | 新闻 | 政策文件 |
description: `| 通知通告 | 新闻 | 政策文件(bug) |
| -------- | ---- | -------- |
| notice | news | policy |`,
};
Expand Down
3 changes: 2 additions & 1 deletion lib/routes/shu/namespace.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: '上海大学',
url: 'jwb.shu.edu.cn',
url: 'www.shu.edu.cn',
description: '上海大学相关网网站',
lang: 'zh-CN',
};
Loading

0 comments on commit f59d7da

Please sign in to comment.