-
Notifications
You must be signed in to change notification settings - Fork 7.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(route): add GDUFS news route and GDUFS xwxy news (#17822)
* feat(route):add GDUFS news route && GDUFS xwxy news * fix(xwxy-news): update authors extraction to use toArray() method * fix(xwxy-news): improve article detail fetching by explicitly passing item to fetchArticleDetail * fix(xwxy-news): reorder authors extraction to trim text after converting to array * refactor(routes): remove protocol from URLs in gdufs/news.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * refactor(routes): simplify route name in gdufs/news.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * refactor(routes): remove protocol from URLs in gdufs/news.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * refactor(routes): simplify route name in gdufs/xwxy-news.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * refactor(routes): use cache to optimize article content fetching and author extraction in gdufs/news.ts and gdufs/xwxy-news.ts * refactor(routes): Cache the entire item object in /gdufs/news & /gdufs/xwxy-news route ---------
- Loading branch information
Showing
3 changed files
with
191 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import type { Namespace } from '@/types'; | ||
|
||
export const namespace: Namespace = { | ||
name: '广东外语外贸大学', | ||
url: 'gdufs.edu.cn', | ||
lang: 'zh-CN', | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
import { Route } from '@/types'; | ||
import { load } from 'cheerio'; | ||
import cache from '@/utils/cache'; | ||
import got from '@/utils/got'; | ||
import { parseDate } from '@/utils/parse-date'; | ||
|
||
const site = 'https://www.gdufs.edu.cn'; | ||
|
||
export const route: Route = { | ||
path: '/news', | ||
categories: ['university'], | ||
example: '/gdufs/news', | ||
parameters: {}, | ||
features: { | ||
requireConfig: false, | ||
requirePuppeteer: false, | ||
antiCrawler: false, | ||
supportBT: false, | ||
supportPodcast: false, | ||
supportScihub: false, | ||
}, | ||
radar: [ | ||
{ | ||
source: ['www.gdufs.edu.cn/gwxw/gwxw1.htm', 'www.gdufs.edu.cn/'], | ||
}, | ||
], | ||
name: '新闻', | ||
maintainers: ['gz4zzxc'], | ||
handler, | ||
url: 'www.gdufs.edu.cn/gwxw/gwxw1.htm', | ||
}; | ||
|
||
async function handler() { | ||
const link = 'https://www.gdufs.edu.cn/gwxw/gwxw1.htm'; | ||
|
||
const response = await got(link); | ||
const $ = load(response.body); | ||
const list = $('ul.list_luntan li'); | ||
|
||
const items = await Promise.all( | ||
list.toArray().map((element) => { | ||
const item = $(element); | ||
const href = item.find('a').attr('href') || ''; | ||
const title = item.find('h5').text().trim(); | ||
const day = item.find('h3').text().trim(); | ||
const yearMonth = item.find('h6').text().trim(); | ||
const dateString = yearMonth + '/' + day; | ||
const fullLink = href.startsWith('http') ? href : new URL(href, site).href; | ||
const pubDate = parseDate(dateString).toUTCString(); | ||
|
||
return cache.tryGet(fullLink, async () => { | ||
try { | ||
const articleRes = await got(fullLink); | ||
const $$ = load(articleRes.body); | ||
const description = $$('.v_news_content').html()?.trim() || ''; | ||
|
||
let author = ''; | ||
const authorSpans = $$('.nav01 h6 .ll span'); | ||
authorSpans.each((_, el) => { | ||
const text = $$(el).text().trim(); | ||
if (text.includes('责任编辑:')) { | ||
author = text.replace('责任编辑:', '').trim(); | ||
} else if (text.includes('文字:')) { | ||
author = text.replace('文字:', '').trim(); | ||
} | ||
}); | ||
|
||
return { | ||
title, | ||
link: fullLink, | ||
description, | ||
pubDate, | ||
author, | ||
}; | ||
} catch { | ||
return { | ||
title, | ||
link: fullLink, | ||
description: '内容获取失败。', | ||
pubDate, | ||
author: '', | ||
}; | ||
} | ||
}); | ||
}) | ||
); | ||
|
||
return { | ||
title: '广外-大学要闻', | ||
link, | ||
description: '广东外语外贸大学-大学要闻', | ||
item: items, | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import { Route } from '@/types'; | ||
import { load } from 'cheerio'; | ||
import cache from '@/utils/cache'; | ||
import got from '@/utils/got'; | ||
import { parseDate } from '@/utils/parse-date'; | ||
|
||
export const route: Route = { | ||
path: '/xwxy-news', | ||
categories: ['university'], | ||
example: '/gdufs/xwxy-news', | ||
parameters: {}, | ||
features: { | ||
requireConfig: false, | ||
requirePuppeteer: false, | ||
antiCrawler: false, | ||
supportBT: false, | ||
supportPodcast: false, | ||
supportScihub: false, | ||
}, | ||
radar: [ | ||
{ | ||
source: ['xwxy.gdufs.edu.cn/xwzx/xyxw', 'xwxy.gdufs.edu.cn/'], | ||
}, | ||
], | ||
name: '新闻学院-学院新闻', | ||
maintainers: ['gz4zzxc'], | ||
handler, | ||
url: 'xwxy.gdufs.edu.cn/xwzx/xyxw', | ||
}; | ||
|
||
async function handler() { | ||
const BASE_URL = 'https://xwxy.gdufs.edu.cn'; | ||
const link = `${BASE_URL}/xwzx/xyxw.htm`; | ||
|
||
const response = await got(link); | ||
if (!response.body) { | ||
throw new Error('No response body'); | ||
} | ||
const $ = load(response.body); | ||
const list = $('div.flex-center a.clearfix'); | ||
|
||
const items = list.toArray().map((element) => { | ||
const item = $(element); | ||
const href = item.attr('href') || ''; | ||
const dateText = item.find('i').text().trim(); | ||
const pubDate = parseDate(dateText).toUTCString(); | ||
return { | ||
title: item.find('h5').text().trim(), | ||
link: href.startsWith('http') ? href : new URL(href, BASE_URL).href, | ||
pubDate, | ||
}; | ||
}); | ||
|
||
const enhancedItems = await Promise.all( | ||
items.map((item) => | ||
cache.tryGet(item.link, async () => { | ||
try { | ||
const articleResponse = await got(item.link); | ||
if (!articleResponse.body) { | ||
throw new Error('No article body'); | ||
} | ||
const $$ = load(articleResponse.body); | ||
const content = $$('#vsb_content .v_news_content').html() || ''; | ||
const authors = $$('.show01 p i') | ||
.toArray() | ||
.map((el) => $$(el).text().trim()); | ||
|
||
return { | ||
...item, | ||
description: content, | ||
author: authors.join(' '), | ||
}; | ||
} catch { | ||
return { | ||
...item, | ||
description: '无法获取内容', | ||
author: '', | ||
}; | ||
} | ||
}) | ||
) | ||
); | ||
|
||
return { | ||
title: '广外新传学院-学院新闻', | ||
link, | ||
description: '广东外语外贸大学新闻与传播学院官网-学院新闻', | ||
item: enhancedItems, | ||
}; | ||
} |