Skip to content

Commit

Permalink
feat(route): add route UK Parliament Commons Library research by topic (
Browse files Browse the repository at this point in the history
#16941)

* feat(route): add route UK Parliament Lords Library Research by Topic

* chore(route): rename parameter from tag to topic.

* feat(route): add route UK Parliament Commons Library Research by Topic

* chore(route): update parameters desc.

* fix: close browser after puppeteer usage

---------
  • Loading branch information
AntiKnot authored Oct 12, 2024
1 parent b1a030c commit 9376572
Showing 1 changed file with 55 additions and 0 deletions.
55 changes: 55 additions & 0 deletions lib/routes/parliament.uk/commonslibrary.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { load } from 'cheerio';
import { Route } from '@/types';
import puppeteer from '@/utils/puppeteer';
import timezone from '@/utils/timezone';

export const route: Route = {
path: '/commonslibrary/type/:topic?',
categories: ['government'],
example: '/parliament.uk/commonslibrary/type/research-briefing',
parameters: { topic: 'research by topic, string, example: [research-briefing|data-dashboard]' },
features: {
requireConfig: false,
requirePuppeteer: true,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
name: 'Commonlibrary',
maintainers: ['AntiKnot'],
handler,
};

async function handler(ctx) {
const { topic } = ctx.req.param();
const baseUrl = 'https://commonslibrary.parliament.uk/type';
const url = `${baseUrl}/${topic}/`;
const browser = await puppeteer();
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' ? request.continue() : request.abort();
});
await page.goto(url, {
waitUntil: 'domcontentloaded',
});

const html = await page.evaluate(() => document.documentElement.innerHTML);
await page.close();
const $ = load(html);
const items = $('article.card--horizontal')
.map((_, article) => ({
title: $(article).find('.card__text a').text().trim(),
link: $(article).find('.card__text a').attr('href'),
description: $(article).find('p').last().text().trim(),
pubDate: timezone($(article).find('.card__date time').attr('datetime')),
}))
.toArray();
browser.close();
return {
title: `parliament - lordslibrary - ${topic}`,
link: baseUrl,
item: items,
};
}

0 comments on commit 9376572

Please sign in to comment.