Skip to content

Commit

Permalink
feat(route/cna): Preserve web crawling method.
Browse files Browse the repository at this point in the history
  • Loading branch information
dzx-dzx committed Nov 14, 2023
1 parent f4f14bc commit c79168b
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/v2/cna/router.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
module.exports = (router) => {
router.get('/:id?', require('./'));
router.get('/web/:id?', require('./web/'));
};
62 changes: 62 additions & 0 deletions lib/v2/cna/web/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone');

module.exports = async (ctx) => {
const id = ctx.params.id || 'aall';

let rootUrl;

if (/^\d+$/.test(id)) {
rootUrl = `https://www.cna.com.tw/topic/newstopic/${id}.aspx`;
} else {
rootUrl = `https://www.cna.com.tw/list/${id}.aspx`;
}
const response = await got({
method: 'get',
url: rootUrl,
});

const $ = cheerio.load(response.data);
const list = $('*:is(.pcBox .caItem, .mainList li a div) h2')
.slice(0, ctx.query.limit ? parseInt(ctx.query.limit) : 10)
.toArray()
.map((item) => {
item = $(item);
return {
title: item.text(),
link: item.parents('a').attr('href'),
pubDate: timezone(parseDate(item.next().text()), +8),
};
});

const items = await Promise.all(
list.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const detailResponse = await got({
method: 'get',
url: item.link,
});
const content = cheerio.load(detailResponse.data);
const topImage = content('.fullPic').html();

item.description = (topImage === null ? '' : topImage) + content('.paragraph').eq(0).html();
item.category = [
...content("meta[property='article:tag']")
.get()
.map((e) => e.attribs.content),
content('.active > a').text(),
];

return item;
})
)
);

ctx.state.data = {
title: $('title').text(),
link: rootUrl,
item: items,
};
};

0 comments on commit c79168b

Please sign in to comment.