From b1a030c5f0fd6890abc2cfad5185c6d37244f010 Mon Sep 17 00:00:00 2001 From: "xxx.Yan" Date: Sun, 13 Oct 2024 03:33:39 +0800 Subject: [PATCH] feat(route): add route UK Parliament Lords Library research by topic (#16933) * feat(route): add route UK Parliament Lords Library Research by Topic * chore(route): rename parameter from tag to topic. * fix: close browser after puppeteer usage --------- --- lib/routes/parliament.uk/lordslibrary.ts | 55 ++++++++++++++++++++++++ lib/routes/parliament.uk/namespace.ts | 6 +++ 2 files changed, 61 insertions(+) create mode 100644 lib/routes/parliament.uk/lordslibrary.ts create mode 100644 lib/routes/parliament.uk/namespace.ts diff --git a/lib/routes/parliament.uk/lordslibrary.ts b/lib/routes/parliament.uk/lordslibrary.ts new file mode 100644 index 00000000000000..e2d07f1cadac00 --- /dev/null +++ b/lib/routes/parliament.uk/lordslibrary.ts @@ -0,0 +1,55 @@ +import { load } from 'cheerio'; +import { Route } from '@/types'; +import puppeteer from '@/utils/puppeteer'; +import timezone from '@/utils/timezone'; + +export const route: Route = { + path: '/lordslibrary/type/:topic?', + categories: ['government'], + example: '/parliament.uk/lordslibrary/type/research-briefing', + parameters: { topic: 'research by topic, string, example: [research-briefing|buisness|economy]' }, + features: { + requireConfig: false, + requirePuppeteer: true, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + name: 'House of Lords Library', + maintainers: ['AntiKnot'], + handler, +}; + +async function handler(ctx) { + const { topic } = ctx.req.param(); + const baseUrl = 'https://lordslibrary.parliament.uk/type'; + const url = `${baseUrl}/${topic}/`; + const browser = await puppeteer(); + const page = await browser.newPage(); + await page.setRequestInterception(true); + page.on('request', (request) => { + request.resourceType() === 'document' ? request.continue() : request.abort(); + }); + await page.goto(url, { + waitUntil: 'domcontentloaded', + }); + + const html = await page.evaluate(() => document.documentElement.innerHTML); + await page.close(); + const $ = load(html); + const items = $('article.card--horizontal') + .map((_, article) => ({ + title: $(article).find('.card__text a').text().trim(), + link: $(article).find('.card__text a').attr('href'), + description: $(article).find('p').last().text().trim(), + pubDate: timezone($(article).find('.card__date time').attr('datetime')), + })) + .toArray(); + browser.close(); + return { + title: `parliament - lordslibrary - ${topic}`, + link: baseUrl, + item: items, + }; +} diff --git a/lib/routes/parliament.uk/namespace.ts b/lib/routes/parliament.uk/namespace.ts new file mode 100644 index 00000000000000..0480446f06aa4a --- /dev/null +++ b/lib/routes/parliament.uk/namespace.ts @@ -0,0 +1,6 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'UK Parliament', + url: 'parliament.uk', +};