From e0b40aaddf348888bedf8f0ed2b74098415626c6 Mon Sep 17 00:00:00 2001 From: quiniapiezoelectricity <73748843+quiniapiezoelectricity@users.noreply.github.com> Date: Mon, 28 Oct 2024 18:10:11 +0000 Subject: [PATCH] feat(route): add washingtonpost route (#17208) * feat(route): add washingtonpost route * rename route * fix --- lib/routes/washingtonpost/app.ts | 118 ++++++++++++++++++ lib/routes/washingtonpost/namespace.ts | 6 + .../washingtonpost/templates/description.art | 59 +++++++++ 3 files changed, 183 insertions(+) create mode 100644 lib/routes/washingtonpost/app.ts create mode 100644 lib/routes/washingtonpost/namespace.ts create mode 100644 lib/routes/washingtonpost/templates/description.art diff --git a/lib/routes/washingtonpost/app.ts b/lib/routes/washingtonpost/app.ts new file mode 100644 index 00000000000000..c69009c1e21411 --- /dev/null +++ b/lib/routes/washingtonpost/app.ts @@ -0,0 +1,118 @@ +import { Route } from '@/types'; +import cache from '@/utils/cache'; +import got from '@/utils/got'; +import { art } from '@/utils/render'; +import path from 'node:path'; +import { getCurrentPath } from '@/utils/helpers'; +import { FetchError } from 'ofetch'; +import dayjs from 'dayjs'; +import utc from 'dayjs/plugin/utc'; +import timezone from 'dayjs/plugin/timezone'; +import advancedFormat from 'dayjs/plugin/advancedFormat'; + +export const route: Route = { + path: '/app/:category{.+}?', + categories: ['traditional-media'], + example: '/washingtonpost/app/national', + parameters: { + category: 'Category from the path of the URL of the corresponding site, see below', + }, + features: { + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + name: 'App', + maintainers: ['quiniapiezoelectricity'], + radar: [ + { + source: ['www.washingtonpost.com/:category'], + target: '/app/:category', + }, + ], + handler, + description: `:::tip +For example, the category for https://www.washingtonpost.com/national/investigations would be /national/investigations. +:::`, +}; + +function handleDuplicates(array) { + const objects = {}; + for (const obj of array) { + objects[obj.id] = objects[obj.id] ? Object.assign(objects[obj.id], obj) : obj; + } + return Object.values(objects); +} + +async function handler(ctx) { + const category = ctx.req.param('category') ?? ''; + const __dirname = getCurrentPath(import.meta.url); + const headers = { + Accept: '*/*', + Connection: 'keep-alive', + 'User-Agent': 'Classic/6.70.0', + }; + dayjs.extend(utc); + dayjs.extend(timezone); + dayjs.extend(advancedFormat); + art.defaults.imports.dayjs = dayjs; + + const url = `https://jsonapp1.washingtonpost.com/fusion_prod/v2/${category}`; + const response = await got.get(url, { headers }); + const title = response.data.tracking.page_title.includes('Washington Post') ? response.data.tracking.page_title : `The Washington Post - ${response.data.tracking.page_title}`; + const link = 'https://washingtonpost.com' + response.data.tracking.page_path; + const mains = response.data.regions[0].items.filter((item) => item.items); + const list = mains.flatMap((main) => + main.items[0].items + .filter((item) => item.is_from_feed === true) + .map((item) => { + const object = { + id: item.id, + title: item.headline.text, + link: item.link.url, + pubDate: item.link.display_date, + updated: item.link.last_modified, + }; + if (item.blurbs?.items[0]?.text) { + object.description = item.blurbs?.items[0]?.text; + } + return object; + }) + ); + const feed = handleDuplicates(list); + const items = await Promise.all( + feed.map((item) => + cache.tryGet(item.link, async () => { + let response; + try { + response = await got(`https://rainbowapi-a.wpdigital.net/rainbow-data-service/rainbow/content-by-url.json?followLinks=false&url=${item.link}`, { headers }); + } catch (error) { + if (error instanceof FetchError && error.statusCode === 415) { + // Interactive or podcast contents will return 415 Unsupported Media Type. Keep calm and carry on. + return item; + } else { + throw error; + } + } + item.title = response.data.title ?? item.title; + item.author = + response.data.items + .filter((entry) => entry.type === 'byline') + ?.flatMap((entry) => entry.authors.map((author) => author.name)) + ?.join(', ') ?? ''; + item.description = art(path.join(__dirname, 'templates/description.art'), { + content: response.data.items, + }); + return item; + }) + ) + ); + + return { + title, + link, + item: items, + }; +} diff --git a/lib/routes/washingtonpost/namespace.ts b/lib/routes/washingtonpost/namespace.ts new file mode 100644 index 00000000000000..7383ab2bdeb7e3 --- /dev/null +++ b/lib/routes/washingtonpost/namespace.ts @@ -0,0 +1,6 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'The Washington Post', + url: 'www.washingtonpost.com', +}; diff --git a/lib/routes/washingtonpost/templates/description.art b/lib/routes/washingtonpost/templates/description.art new file mode 100644 index 00000000000000..fc7382329a7edb --- /dev/null +++ b/lib/routes/washingtonpost/templates/description.art @@ -0,0 +1,59 @@ +{{ if content }} +{{ each content }} + {{ if $value.type == 'title' && $value.subtype != 'h1'}} + <{{ if $value.subtype }}{{ $value.subtype }}{{ else }}h2{{ /if }}> + {{ if $value.mime == 'text/html' }}{{@ $value.content }}{{ /if }} + {{ if $value.mime == 'text/plain' }}{{ $value.content }}{{ /if }} + + {{ /if }} + {{ if $value.type == 'sanitized_html' }} + {{ if $value.subtype == 'paragraph' }}
{{ else if $value.subtype == 'subhead' }}
+ {{ /if }} + {{ if $value.type == 'image' }} + + {{ /if }} + {{ if $value.type == 'video' }} + {{ if $value.content && $value.content.html }}{{@ $value.content.html }} + {{ else if $value.mediaURL }} + + {{ /if }} + {{ /if }} + {{ if $value.type == 'list' }} + {{ if $value.subtype == 'ordered' }}+ {{ if $value.mime == 'text/html' }}{{@ $value.content }}{{ /if }} + {{ if $value.mime == 'text/plain' }}{{ $value.content }}{{ /if }} +
+ {{ if $value.mime == 'text/html' }}{{@ $value.content }}{{ /if }} + {{ if $value.mime == 'text/plain' }}{{ $value.content }}{{ /if }} +
+ {{ /if}} + {{ /if }} + {{ if $value.type == 'date' }} + {{ if $value.subtype == 'live-update'}} + {{ if $value.content }}{{ $imports.dayjs.tz($value.content,"America/New_York").locale('en').format('dddd, MMMM D, YYYY h:mm A z') }}{{ /if }} + {{ /if }} + {{ /if }} +{{ /each }} +{{ /if }} \ No newline at end of file