diff --git a/lib/v2/chinafactcheck/index.js b/lib/v2/chinafactcheck/index.js index e7998d5c41a177..0483edf2ea5a7f 100644 --- a/lib/v2/chinafactcheck/index.js +++ b/lib/v2/chinafactcheck/index.js @@ -3,35 +3,34 @@ const cheerio = require('cheerio'); const got = require('@/utils/got'); module.exports = async (ctx) => { - const rssTitle = '有据|国际新闻事实核查'; - - const response = await got('https://chinafactcheck.com'); + const response = await got(utils.siteLink, { + headers: { + 'user-agent': utils.trueUA, + }, + }); const $ = cheerio.load(response.data); - const articlesLinkListNode = $('.post-info-box .post-thumb a') - .map((_, item) => $(item).attr('href')) - .get(); - const articlesLinkList = Array.from(new Set(articlesLinkListNode)); + const articlesLink = $('.post-info-box .post-thumb a') + .toArray() + .map((item) => ({ link: $(item).attr('href') })); const articles = await Promise.all( - articlesLinkList.map(async (item) => { - const articlesLink = item; - - const detail = await ctx.cache.tryGet(articlesLink, () => utils.getArticleDetail(articlesLink)); + articlesLink.map((item) => + ctx.cache.tryGet(item.link, async () => { + const { title, author, pubDate, description, category } = await utils.getArticleDetail(item.link); - const element = { - title: detail.title, - author: detail.author, - pubDate: detail.time, - description: detail.description, - link: articlesLink, - }; - return element; - }) + item.title = title; + item.author = author; + item.pubDate = pubDate; + item.description = description; + item.category = category; + return item; + }) + ) ); ctx.state.data = { - title: rssTitle, + title: $('head title').text(), link: utils.siteLink, item: articles, }; diff --git a/lib/v2/chinafactcheck/maintainer.js b/lib/v2/chinafactcheck/maintainer.js index 414dc193983552..aefc7919a97731 100644 --- a/lib/v2/chinafactcheck/maintainer.js +++ b/lib/v2/chinafactcheck/maintainer.js @@ -1,3 +1,4 @@ module.exports = { + '': ['kdanfly'], '/': ['kdanfly'], }; diff --git a/lib/v2/chinafactcheck/templates/description.art b/lib/v2/chinafactcheck/templates/description.art deleted file mode 100644 index 29add8424498fd..00000000000000 --- a/lib/v2/chinafactcheck/templates/description.art +++ /dev/null @@ -1 +0,0 @@ -{{@ desc.article }} diff --git a/lib/v2/chinafactcheck/utils.js b/lib/v2/chinafactcheck/utils.js index 6b3d1831235bcf..a6faf152ecb272 100644 --- a/lib/v2/chinafactcheck/utils.js +++ b/lib/v2/chinafactcheck/utils.js @@ -1,16 +1,10 @@ -const { art } = require('@/utils/render'); -const path = require('path'); const cheerio = require('cheerio'); const got = require('@/utils/got'); const { parseDate } = require('@/utils/parse-date'); +const { trueUA } = require('@/config').value; const siteLink = 'https://chinafactcheck.com'; -const renderDesc = (desc) => - art(path.join(__dirname, 'templates/description.art'), { - desc, - }); - const cleanDom = (dom) => { dom('*[style]').removeAttr('style'); dom('br').remove(); @@ -30,31 +24,37 @@ const cleanDom = (dom) => { }; const getArticleDetail = async (link) => { - const response = await got(link); + const response = await got(link, { + headers: { + 'user-agent': trueUA, + }, + }); const $ = cleanDom(cheerio.load(response.data)); const title = $('.content-head h2').text().trim(); const author = $('.content-persons p span:last').text().trim(); - const time = parseDate($('.content-time').text().trim(), 'YYYY-MM-DD'); + const pubDate = parseDate($('.content-time').text().trim(), 'YYYY-MM-DD'); - const description = { - article: $('div[class=content-list-box]').html(), - }; - return new ArticleDetail(title, author, time, renderDesc(description)); + const description = $('div[class=content-list-box]').html(); + const category = $('.content-tags a[rel="tag"]') + .toArray() + .map((item) => $(item).text().trim()); + return new ArticleDetail(title, author, pubDate, description, category); }; class ArticleDetail { - constructor(title, author, time, description) { + constructor(title, author, pubDate, description, category) { this.title = title; this.author = author; - this.time = time; + this.pubDate = pubDate; this.description = description; + this.category = category; } } module.exports = { siteLink, - renderDesc, cleanDom, getArticleDetail, ArticleDetail, + trueUA, };