Skip to content

Commit

Permalink
feat(route): add GDUFS news route and GDUFS xwxy news (#17822)
Browse files Browse the repository at this point in the history
* feat(route):add GDUFS news route && GDUFS xwxy news

* fix(xwxy-news): update authors extraction to use toArray() method

* fix(xwxy-news): improve article detail fetching by explicitly passing item to fetchArticleDetail

* fix(xwxy-news): reorder authors extraction to trim text after converting to array

* refactor(routes): remove protocol from URLs in gdufs/news.ts

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* refactor(routes): simplify route name in gdufs/news.ts

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* refactor(routes): remove protocol from URLs in gdufs/news.ts

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* refactor(routes): simplify route name in gdufs/xwxy-news.ts

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* refactor(routes): use cache to optimize article content fetching and author extraction in gdufs/news.ts and gdufs/xwxy-news.ts

* refactor(routes): Cache the entire item object in /gdufs/news & /gdufs/xwxy-news route

---------
  • Loading branch information
gz4zzxc authored Dec 16, 2024
1 parent 030924a commit 07c1e88
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 0 deletions.
7 changes: 7 additions & 0 deletions lib/routes/gdufs/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: '广东外语外贸大学',
url: 'gdufs.edu.cn',
lang: 'zh-CN',
};
94 changes: 94 additions & 0 deletions lib/routes/gdufs/news.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import { Route } from '@/types';
import { load } from 'cheerio';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';

const site = 'https://www.gdufs.edu.cn';

export const route: Route = {
path: '/news',
categories: ['university'],
example: '/gdufs/news',
parameters: {},
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['www.gdufs.edu.cn/gwxw/gwxw1.htm', 'www.gdufs.edu.cn/'],
},
],
name: '新闻',
maintainers: ['gz4zzxc'],
handler,
url: 'www.gdufs.edu.cn/gwxw/gwxw1.htm',
};

async function handler() {
const link = 'https://www.gdufs.edu.cn/gwxw/gwxw1.htm';

const response = await got(link);
const $ = load(response.body);
const list = $('ul.list_luntan li');

const items = await Promise.all(
list.toArray().map((element) => {
const item = $(element);
const href = item.find('a').attr('href') || '';
const title = item.find('h5').text().trim();
const day = item.find('h3').text().trim();
const yearMonth = item.find('h6').text().trim();
const dateString = yearMonth + '/' + day;
const fullLink = href.startsWith('http') ? href : new URL(href, site).href;
const pubDate = parseDate(dateString).toUTCString();

return cache.tryGet(fullLink, async () => {
try {
const articleRes = await got(fullLink);
const $$ = load(articleRes.body);
const description = $$('.v_news_content').html()?.trim() || '';

let author = '';
const authorSpans = $$('.nav01 h6 .ll span');
authorSpans.each((_, el) => {
const text = $$(el).text().trim();
if (text.includes('责任编辑:')) {
author = text.replace('责任编辑:', '').trim();
} else if (text.includes('文字:')) {
author = text.replace('文字:', '').trim();
}
});

return {
title,
link: fullLink,
description,
pubDate,
author,
};
} catch {
return {
title,
link: fullLink,
description: '内容获取失败。',
pubDate,
author: '',
};
}
});
})
);

return {
title: '广外-大学要闻',
link,
description: '广东外语外贸大学-大学要闻',
item: items,
};
}
90 changes: 90 additions & 0 deletions lib/routes/gdufs/xwxy/xwxy-news.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import { Route } from '@/types';
import { load } from 'cheerio';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';

export const route: Route = {
path: '/xwxy-news',
categories: ['university'],
example: '/gdufs/xwxy-news',
parameters: {},
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['xwxy.gdufs.edu.cn/xwzx/xyxw', 'xwxy.gdufs.edu.cn/'],
},
],
name: '新闻学院-学院新闻',
maintainers: ['gz4zzxc'],
handler,
url: 'xwxy.gdufs.edu.cn/xwzx/xyxw',
};

async function handler() {
const BASE_URL = 'https://xwxy.gdufs.edu.cn';
const link = `${BASE_URL}/xwzx/xyxw.htm`;

const response = await got(link);
if (!response.body) {
throw new Error('No response body');
}
const $ = load(response.body);
const list = $('div.flex-center a.clearfix');

const items = list.toArray().map((element) => {
const item = $(element);
const href = item.attr('href') || '';
const dateText = item.find('i').text().trim();
const pubDate = parseDate(dateText).toUTCString();
return {
title: item.find('h5').text().trim(),
link: href.startsWith('http') ? href : new URL(href, BASE_URL).href,
pubDate,
};
});

const enhancedItems = await Promise.all(
items.map((item) =>
cache.tryGet(item.link, async () => {
try {
const articleResponse = await got(item.link);
if (!articleResponse.body) {
throw new Error('No article body');
}
const $$ = load(articleResponse.body);
const content = $$('#vsb_content .v_news_content').html() || '';
const authors = $$('.show01 p i')
.toArray()
.map((el) => $$(el).text().trim());

return {
...item,
description: content,
author: authors.join(' '),
};
} catch {
return {
...item,
description: '无法获取内容',
author: '',
};
}
})
)
);

return {
title: '广外新传学院-学院新闻',
link,
description: '广东外语外贸大学新闻与传播学院官网-学院新闻',
item: enhancedItems,
};
}

0 comments on commit 07c1e88

Please sign in to comment.