-
Notifications
You must be signed in to change notification settings - Fork 7.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(route): add Scientific American Podcasts
- Loading branch information
Showing
3 changed files
with
310 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import type { Namespace } from '@/types'; | ||
|
||
export const namespace: Namespace = { | ||
name: 'Scientific American', | ||
url: 'scientificamerican.com', | ||
categories: ['new-media'], | ||
description: 'Scientific American is the essential guide to the most awe-inspiring advances in science and technology, explaining how they change our understanding of the world and shape our lives.', | ||
lang: 'en', | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,270 @@ | ||
import path from 'node:path'; | ||
|
||
import { type CheerioAPI, load } from 'cheerio'; | ||
import { type Context } from 'hono'; | ||
|
||
import { type DataItem, type Route, type Data, ViewType } from '@/types'; | ||
|
||
import { art } from '@/utils/render'; | ||
import cache from '@/utils/cache'; | ||
import { getCurrentPath } from '@/utils/helpers'; | ||
import ofetch from '@/utils/ofetch'; | ||
import { parseDate } from '@/utils/parse-date'; | ||
import timezone from '@/utils/timezone'; | ||
|
||
const __dirname = getCurrentPath(import.meta.url); | ||
|
||
export const handler = async (ctx: Context): Promise<Data> => { | ||
const { id } = ctx.req.param(); | ||
const limit: number = Number.parseInt(ctx.req.query('limit') ?? '12', 10); | ||
|
||
const baseUrl: string = 'https://www.scientificamerican.com'; | ||
const targetUrl: string = new URL(`podcast${id ? `/${id}` : 's'}/`, baseUrl).href; | ||
|
||
const response = await ofetch(targetUrl); | ||
const $: CheerioAPI = load(response); | ||
const language: string = $('html').attr('lang') ?? 'en'; | ||
const data: string | undefined = response.match(/window\.__DATA__=JSON\.parse\(`(.*?)`\)/)?.[1]; | ||
const parsedData = data ? JSON.parse(data.replaceAll('\\\\', '\\')) : undefined; | ||
|
||
let items: DataItem[] = []; | ||
|
||
items = parsedData | ||
? parsedData.initialData.props.results.slice(0, limit).map((item): DataItem => { | ||
const title: string = item.title; | ||
const image: string | undefined = item.image_url; | ||
const description: string = art(path.join(__dirname, 'templates/description.art'), { | ||
images: image | ||
? [ | ||
{ | ||
src: image, | ||
alt: item.image_alt_text || title, | ||
width: item.image_width, | ||
height: item.image_height, | ||
}, | ||
] | ||
: undefined, | ||
intro: item.summary, | ||
}); | ||
const pubDate: number | string = item.date_published; | ||
const linkUrl: string | undefined = item.url; | ||
const categories: string[] = [...new Set([item.category, item.subtype, item.column, item.digital_column].filter(Boolean))]; | ||
const authors: DataItem['author'] = item.authors.map((author) => ({ | ||
name: author.name, | ||
url: author.url ? new URL(author.url, baseUrl).href : undefined, | ||
avatar: author.picture_file, | ||
})); | ||
const guid: string = `scientificamerican-${item.id}`; | ||
const updated: number | string = item.release_date ?? pubDate; | ||
|
||
let processedItem: DataItem = { | ||
title, | ||
description, | ||
pubDate: pubDate ? timezone(parseDate(pubDate), +8) : undefined, | ||
link: linkUrl ? new URL(linkUrl, baseUrl).href : undefined, | ||
category: categories, | ||
author: authors, | ||
doi: item.article_doi, | ||
guid, | ||
id: guid, | ||
content: { | ||
html: description, | ||
text: item.summary ?? description, | ||
}, | ||
image, | ||
banner: image, | ||
updated: updated ? timezone(parseDate(updated), +8) : undefined, | ||
language, | ||
}; | ||
|
||
const enclosureUrl: string | undefined = item.media_url; | ||
|
||
if (enclosureUrl) { | ||
const enclosureType: string = `audio/${enclosureUrl.replace(/\?.*$/, '').split(/\./).pop()}`; | ||
|
||
processedItem = { | ||
...processedItem, | ||
enclosure_url: enclosureUrl, | ||
enclosure_type: enclosureType, | ||
enclosure_title: title, | ||
itunes_item_image: image, | ||
}; | ||
} | ||
|
||
return processedItem; | ||
}) | ||
: []; | ||
|
||
items = ( | ||
await Promise.all( | ||
items.map((item) => { | ||
if (!item.link) { | ||
return item; | ||
} | ||
|
||
return cache.tryGet(item.link, async (): Promise<DataItem> => { | ||
const detailResponse = await ofetch(item.link); | ||
|
||
const detailData: string | undefined = detailResponse.match(/window\.__DATA__=JSON\.parse\(`(.*?)`\)/)?.[1]; | ||
const parsedDetailData = detailData ? JSON.parse(detailData.replaceAll('\\\\', '\\')) : undefined; | ||
|
||
if (!parsedDetailData) { | ||
return item; | ||
} | ||
|
||
const articleData = parsedDetailData.initialData.article; | ||
|
||
const title: string = articleData.title; | ||
const image: string | undefined = articleData.image_url; | ||
const description: string = art(path.join(__dirname, 'templates/description.art'), { | ||
images: image | ||
? [ | ||
{ | ||
src: image, | ||
alt: articleData.image_alt_text || title, | ||
width: articleData.image_width, | ||
height: articleData.image_height, | ||
}, | ||
] | ||
: undefined, | ||
intro: articleData.summary, | ||
content: articleData.content, | ||
}); | ||
const pubDate: number | string = articleData.published_at_date_time; | ||
const categories: string[] = [...new Set([articleData.display_category, articleData.primary_category, articleData.subcategory, ...(articleData.categories ?? []), articleData.podcast_series_name])]; | ||
const authors: DataItem['author'] = articleData.authors.map((author) => ({ | ||
name: author.name, | ||
url: author.url ? new URL(author.url, baseUrl).href : undefined, | ||
avatar: author.picture_file, | ||
})); | ||
const guid: string = `scientificamerican-${articleData.id}`; | ||
const updated: number | string = articleData.updated_at_date_time ?? pubDate; | ||
|
||
let processedItem: DataItem = { | ||
title, | ||
description, | ||
pubDate: pubDate ? timezone(parseDate(pubDate), +8) : undefined, | ||
category: categories, | ||
author: authors, | ||
doi: articleData.article_doi, | ||
guid, | ||
id: guid, | ||
content: { | ||
html: description, | ||
text: articleData.summary ?? description, | ||
}, | ||
image, | ||
banner: image, | ||
updated: updated ? timezone(parseDate(updated), +8) : undefined, | ||
language, | ||
}; | ||
|
||
const enclosureUrl: string | undefined = articleData.media_url; | ||
|
||
if (enclosureUrl) { | ||
const enclosureType: string = `audio/${enclosureUrl.replace(/\?.*$/, '').split(/\./).pop()}`; | ||
|
||
processedItem = { | ||
...processedItem, | ||
enclosure_url: enclosureUrl, | ||
enclosure_type: enclosureType, | ||
enclosure_title: title, | ||
itunes_item_image: image, | ||
}; | ||
} | ||
|
||
return { | ||
...item, | ||
...processedItem, | ||
}; | ||
}); | ||
}) | ||
) | ||
).filter((_): _ is DataItem => true); | ||
|
||
return { | ||
title: $('title').text(), | ||
description: $('meta[name="description"]').attr('content'), | ||
link: targetUrl, | ||
item: items, | ||
allowEmpty: true, | ||
image: $('meta[property="og:image"]').attr('content'), | ||
author: $('meta[property="og:site_name"]').attr('content'), | ||
language, | ||
feedLink: $('link[type="application/rss+xml"]').attr('href'), | ||
itunes_author: $('meta[property="og:site_name"]').attr('content'), | ||
itunes_category: 'Science', | ||
id: $('meta[property="og:url"]').attr('content'), | ||
}; | ||
}; | ||
|
||
export const route: Route = { | ||
path: ['/podcasts/:id?', '/podcast/:id?'], | ||
name: 'Podcasts', | ||
url: 'www.scientificamerican.com', | ||
maintainers: ['nczitzk'], | ||
handler, | ||
example: '/scientificamerian/podcast', | ||
parameters: { | ||
id: 'ID, see below', | ||
}, | ||
description: `:::tip | ||
If you subscribe to [Science Quickly](https://www.scientificamerican.com/podcast/science-quickly/),where the URL is \`https://www.scientificamerican.com/podcast/science-quickly/\`, extract the part \`https://www.scientificamerican.com/podcast/\` to the end, which is \`science-quickly\`, and use it as the parameter to fill in. Therefore, the route will be [\`/scientificamerian/podcasts/science-quickly\`](https://rsshub.app/scientificamerian/podcasts/science-quickly). | ||
::: | ||
| All | Science Quickly | Uncertain | | ||
| --- | --------------- | ------------ | | ||
| | science-quickly | science-talk | | ||
`, | ||
categories: ['new-media'], | ||
features: { | ||
requireConfig: false, | ||
requirePuppeteer: false, | ||
antiCrawler: false, | ||
supportRadar: true, | ||
supportBT: false, | ||
supportPodcast: false, | ||
supportScihub: false, | ||
}, | ||
radar: [ | ||
{ | ||
source: ['www.scientificamerican.com/podcasts/', 'www.scientificamerican.com/podcast/:id'], | ||
target: (params) => { | ||
const id: string = params.id; | ||
|
||
return `/scientificamerian/podcast${id ? `/${id}` : ''}`; | ||
}, | ||
}, | ||
{ | ||
title: 'Science Quickly', | ||
source: ['www.scientificamerican.com/podcast/science-quickly/'], | ||
target: '/podcast/science-quickly', | ||
}, | ||
{ | ||
title: 'Uncertain', | ||
source: ['www.scientificamerican.com/podcast/science-talk/'], | ||
target: '/podcast/science-talk', | ||
}, | ||
], | ||
view: ViewType.Articles, | ||
|
||
zh: { | ||
path: ['/podcasts/:id?', '/podcast/:id?'], | ||
name: 'Podcasts', | ||
url: 'www.scientificamerican.com', | ||
maintainers: ['nczitzk'], | ||
handler, | ||
example: '/scientificamerian/podcast', | ||
parameters: { | ||
id: 'ID,见下表', | ||
}, | ||
description: `:::tip | ||
若订阅 [Science Quickly](https://www.scientificamerican.com/podcast/science-quickly/),网址为 \`https://www.scientificamerican.com/podcast/science-quickly/\`,请截取 \`https://www.scientificamerican.com/podcast/\` 到末尾 \`/\` 的部分 \`science-quickly\` 作为 \`id\` 参数填入,此时目标路由为 [\`/scientificamerian/podcasts/science-quickly\`](https://rsshub.app/scientificamerian/podcasts/science-quickly)。 | ||
::: | ||
| 全部 | Science Quickly | Uncertain | | ||
| ---- | --------------- | ------------ | | ||
| | science-quickly | science-talk | | ||
`, | ||
}, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{{ if images }} | ||
{{ each images image }} | ||
{{ if image?.src }} | ||
<figure> | ||
<img | ||
{{ if image.alt }} | ||
alt="{{ image.alt }}" | ||
{{ /if }} | ||
{{ if image.width }} | ||
alt="{{ image.width }}" | ||
{{ /if }} | ||
{{ if image.height }} | ||
alt="{{ image.height }}" | ||
{{ /if }} | ||
src="{{ image.src }}"> | ||
</figure> | ||
{{ /if }} | ||
{{ /each }} | ||
{{ /if }} | ||
|
||
{{ if intro }} | ||
{{@ intro }} | ||
{{ /if }} | ||
|
||
{{ if content }} | ||
{{ each content c }} | ||
<{{ c.tag }}> | ||
{{@ c.content }} | ||
</{{ c.tag }}> | ||
{{ /each }} | ||
{{ /if }} |