Skip to content

Commit

Permalink
feat(route): add washingtonpost route (#17208)
Browse files Browse the repository at this point in the history
* feat(route): add washingtonpost route

* rename route

* fix
  • Loading branch information
quiniapiezoelectricity authored Oct 28, 2024
1 parent 8a12d89 commit e0b40aa
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 0 deletions.
118 changes: 118 additions & 0 deletions lib/routes/washingtonpost/app.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { art } from '@/utils/render';
import path from 'node:path';
import { getCurrentPath } from '@/utils/helpers';
import { FetchError } from 'ofetch';
import dayjs from 'dayjs';
import utc from 'dayjs/plugin/utc';
import timezone from 'dayjs/plugin/timezone';
import advancedFormat from 'dayjs/plugin/advancedFormat';

export const route: Route = {
path: '/app/:category{.+}?',
categories: ['traditional-media'],
example: '/washingtonpost/app/national',
parameters: {
category: 'Category from the path of the URL of the corresponding site, see below',
},
features: {
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
name: 'App',
maintainers: ['quiniapiezoelectricity'],
radar: [
{
source: ['www.washingtonpost.com/:category'],
target: '/app/:category',
},
],
handler,
description: `:::tip
For example, the category for https://www.washingtonpost.com/national/investigations would be /national/investigations.
:::`,
};

function handleDuplicates(array) {
const objects = {};
for (const obj of array) {
objects[obj.id] = objects[obj.id] ? Object.assign(objects[obj.id], obj) : obj;
}
return Object.values(objects);
}

async function handler(ctx) {
const category = ctx.req.param('category') ?? '';
const __dirname = getCurrentPath(import.meta.url);
const headers = {
Accept: '*/*',
Connection: 'keep-alive',
'User-Agent': 'Classic/6.70.0',
};
dayjs.extend(utc);
dayjs.extend(timezone);
dayjs.extend(advancedFormat);
art.defaults.imports.dayjs = dayjs;

const url = `https://jsonapp1.washingtonpost.com/fusion_prod/v2/${category}`;
const response = await got.get(url, { headers });
const title = response.data.tracking.page_title.includes('Washington Post') ? response.data.tracking.page_title : `The Washington Post - ${response.data.tracking.page_title}`;
const link = 'https://washingtonpost.com' + response.data.tracking.page_path;
const mains = response.data.regions[0].items.filter((item) => item.items);
const list = mains.flatMap((main) =>
main.items[0].items
.filter((item) => item.is_from_feed === true)
.map((item) => {
const object = {
id: item.id,
title: item.headline.text,
link: item.link.url,
pubDate: item.link.display_date,
updated: item.link.last_modified,
};
if (item.blurbs?.items[0]?.text) {
object.description = item.blurbs?.items[0]?.text;
}
return object;
})
);
const feed = handleDuplicates(list);
const items = await Promise.all(
feed.map((item) =>
cache.tryGet(item.link, async () => {
let response;
try {
response = await got(`https://rainbowapi-a.wpdigital.net/rainbow-data-service/rainbow/content-by-url.json?followLinks=false&url=${item.link}`, { headers });
} catch (error) {
if (error instanceof FetchError && error.statusCode === 415) {
// Interactive or podcast contents will return 415 Unsupported Media Type. Keep calm and carry on.
return item;
} else {
throw error;
}
}
item.title = response.data.title ?? item.title;
item.author =
response.data.items
.filter((entry) => entry.type === 'byline')
?.flatMap((entry) => entry.authors.map((author) => author.name))
?.join(', ') ?? '';
item.description = art(path.join(__dirname, 'templates/description.art'), {
content: response.data.items,
});
return item;
})
)
);

return {
title,
link,
item: items,
};
}
6 changes: 6 additions & 0 deletions lib/routes/washingtonpost/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: 'The Washington Post',
url: 'www.washingtonpost.com',
};
59 changes: 59 additions & 0 deletions lib/routes/washingtonpost/templates/description.art
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{{ if content }}
{{ each content }}
{{ if $value.type == 'title' && $value.subtype != 'h1'}}
<{{ if $value.subtype }}{{ $value.subtype }}{{ else }}h2{{ /if }}>
{{ if $value.mime == 'text/html' }}{{@ $value.content }}{{ /if }}
{{ if $value.mime == 'text/plain' }}{{ $value.content }}{{ /if }}
</h{{ if $value.subhead_level }}{{ $value.subhead_level }}{{ else }}1{{ /if }}>
{{ /if }}
{{ if $value.type == 'sanitized_html' }}
{{ if $value.subtype == 'paragraph' }}<p>{{ else if $value.subtype == 'subhead' }}<h{{ if $value.subhead_level }}{{ $value.subhead_level }}{{ else }}4{{ /if }}>{{ /if }}
{{ if $value.mime == 'text/html' }}{{@ $value.content }}{{ /if }}
{{ if $value.mime == 'text/plain' }}{{ $value.content }}{{ /if }}
{{ if $value.oembed }}{{@ $value.oembed }}{{ /if }}
{{ if $value.subtype == 'paragraph' }}</p>{{ else if $value.subtype == 'subhead' }}</h{{ if $value.subhead_level }}{{ $value.subhead_level }}{{ else }}4{{ /if }}>{{ /if }}
{{ /if }}
{{ if $value.type == 'deck' }}
<blockquote><p>
{{ if $value.mime == 'text/html' }}{{@ $value.content }}{{ /if }}
{{ if $value.mime == 'text/plain' }}{{ $value.content }}{{ /if }}
</p></blockquote>
{{ /if }}
{{ if $value.type == 'image' }}
<figure><img src="{{ $value.imageURL }}" alt="{{ $value.blurb }}"><figcaption>{{ $value.fullcaption }}</figcaption></figure>
{{ /if }}
{{ if $value.type == 'video' }}
{{ if $value.content && $value.content.html }}{{@ $value.content.html }}
{{ else if $value.mediaURL }}
<figure>
<video controls
{{ if $value.imageURL }}poster="{{ $value.imageURL }}"{{ /if }}
>
<source src="{{ $value.mediaURL }}">
</video>
{{ if $value.fullcaption }}<figcaption>{{ $value.fullcaption }}</figcaption>{{ /if }}
</figure>
{{ /if }}
{{ /if }}
{{ if $value.type == 'list' }}
{{ if $value.subtype == 'ordered' }}<ol>{{ else }}<ul>{{ /if }}
{{ if $value.mime == 'text/html' }}{{ each $value.content }}<li>{{@ $value }}</li>{{ /each }}{{ /if }}
{{ if $value.mime == 'text/plain' }}{{ each $value.content }}<li>{{ $value }}</li>{{ /each }}{{ /if }}
{{ if $value.subtype == 'ordered' }}</ol>{{ else }}</ul>{{ /if }}
{{ /if }}
{{ if $value.type == 'divider' }}<br><hr><br>{{ /if }}
{{ if $value.type == 'byline' }}
{{ if $value.subtype == 'live-update' || $value.subtype == 'live-reporter-insight' }}
<p><i>
{{ if $value.mime == 'text/html' }}{{@ $value.content }}{{ /if }}
{{ if $value.mime == 'text/plain' }}{{ $value.content }}{{ /if }}
</i></p>
{{ /if}}
{{ /if }}
{{ if $value.type == 'date' }}
{{ if $value.subtype == 'live-update'}}
{{ if $value.content }}{{ $imports.dayjs.tz($value.content,"America/New_York").locale('en').format('dddd, MMMM D, YYYY h:mm A z') }}{{ /if }}
{{ /if }}
{{ /if }}
{{ /each }}
{{ /if }}

0 comments on commit e0b40aa

Please sign in to comment.