diff --git a/lib/routes/scientificamerican/namespace.ts b/lib/routes/scientificamerican/namespace.ts new file mode 100644 index 00000000000000..1ae9b33482da4c --- /dev/null +++ b/lib/routes/scientificamerican/namespace.ts @@ -0,0 +1,9 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'Scientific American', + url: 'scientificamerican.com', + categories: ['new-media'], + description: 'Scientific American is the essential guide to the most awe-inspiring advances in science and technology, explaining how they change our understanding of the world and shape our lives.', + lang: 'en', +}; diff --git a/lib/routes/scientificamerican/podcast.ts b/lib/routes/scientificamerican/podcast.ts new file mode 100644 index 00000000000000..f8acdd7ea03ea9 --- /dev/null +++ b/lib/routes/scientificamerican/podcast.ts @@ -0,0 +1,270 @@ +import path from 'node:path'; + +import { type CheerioAPI, load } from 'cheerio'; +import { type Context } from 'hono'; + +import { type DataItem, type Route, type Data, ViewType } from '@/types'; + +import { art } from '@/utils/render'; +import cache from '@/utils/cache'; +import { getCurrentPath } from '@/utils/helpers'; +import ofetch from '@/utils/ofetch'; +import { parseDate } from '@/utils/parse-date'; +import timezone from '@/utils/timezone'; + +const __dirname = getCurrentPath(import.meta.url); + +export const handler = async (ctx: Context): Promise => { + const { id } = ctx.req.param(); + const limit: number = Number.parseInt(ctx.req.query('limit') ?? '12', 10); + + const baseUrl: string = 'https://www.scientificamerican.com'; + const targetUrl: string = new URL(`podcast${id ? `/${id}` : 's'}/`, baseUrl).href; + + const response = await ofetch(targetUrl); + const $: CheerioAPI = load(response); + const language: string = $('html').attr('lang') ?? 'en'; + const data: string | undefined = response.match(/window\.__DATA__=JSON\.parse\(`(.*?)`\)/)?.[1]; + const parsedData = data ? JSON.parse(data.replaceAll('\\\\', '\\')) : undefined; + + let items: DataItem[] = []; + + items = parsedData + ? parsedData.initialData.props.results.slice(0, limit).map((item): DataItem => { + const title: string = item.title; + const image: string | undefined = item.image_url; + const description: string = art(path.join(__dirname, 'templates/description.art'), { + images: image + ? [ + { + src: image, + alt: item.image_alt_text || title, + width: item.image_width, + height: item.image_height, + }, + ] + : undefined, + intro: item.summary, + }); + const pubDate: number | string = item.date_published; + const linkUrl: string | undefined = item.url; + const categories: string[] = [...new Set([item.category, item.subtype, item.column, item.digital_column].filter(Boolean))]; + const authors: DataItem['author'] = item.authors.map((author) => ({ + name: author.name, + url: author.url ? new URL(author.url, baseUrl).href : undefined, + avatar: author.picture_file, + })); + const guid: string = `scientificamerican-${item.id}`; + const updated: number | string = item.release_date ?? pubDate; + + let processedItem: DataItem = { + title, + description, + pubDate: pubDate ? timezone(parseDate(pubDate), +8) : undefined, + link: linkUrl ? new URL(linkUrl, baseUrl).href : undefined, + category: categories, + author: authors, + doi: item.article_doi, + guid, + id: guid, + content: { + html: description, + text: item.summary ?? description, + }, + image, + banner: image, + updated: updated ? timezone(parseDate(updated), +8) : undefined, + language, + }; + + const enclosureUrl: string | undefined = item.media_url; + + if (enclosureUrl) { + const enclosureType: string = `audio/${enclosureUrl.replace(/\?.*$/, '').split(/\./).pop()}`; + + processedItem = { + ...processedItem, + enclosure_url: enclosureUrl, + enclosure_type: enclosureType, + enclosure_title: title, + itunes_item_image: image, + }; + } + + return processedItem; + }) + : []; + + items = ( + await Promise.all( + items.map((item) => { + if (!item.link) { + return item; + } + + return cache.tryGet(item.link, async (): Promise => { + const detailResponse = await ofetch(item.link); + + const detailData: string | undefined = detailResponse.match(/window\.__DATA__=JSON\.parse\(`(.*?)`\)/)?.[1]; + const parsedDetailData = detailData ? JSON.parse(detailData.replaceAll('\\\\', '\\')) : undefined; + + if (!parsedDetailData) { + return item; + } + + const articleData = parsedDetailData.initialData.article; + + const title: string = articleData.title; + const image: string | undefined = articleData.image_url; + const description: string = art(path.join(__dirname, 'templates/description.art'), { + images: image + ? [ + { + src: image, + alt: articleData.image_alt_text || title, + width: articleData.image_width, + height: articleData.image_height, + }, + ] + : undefined, + intro: articleData.summary, + content: articleData.content, + }); + const pubDate: number | string = articleData.published_at_date_time; + const categories: string[] = [...new Set([articleData.display_category, articleData.primary_category, articleData.subcategory, ...(articleData.categories ?? []), articleData.podcast_series_name])]; + const authors: DataItem['author'] = articleData.authors.map((author) => ({ + name: author.name, + url: author.url ? new URL(author.url, baseUrl).href : undefined, + avatar: author.picture_file, + })); + const guid: string = `scientificamerican-${articleData.id}`; + const updated: number | string = articleData.updated_at_date_time ?? pubDate; + + let processedItem: DataItem = { + title, + description, + pubDate: pubDate ? timezone(parseDate(pubDate), +8) : undefined, + category: categories, + author: authors, + doi: articleData.article_doi, + guid, + id: guid, + content: { + html: description, + text: articleData.summary ?? description, + }, + image, + banner: image, + updated: updated ? timezone(parseDate(updated), +8) : undefined, + language, + }; + + const enclosureUrl: string | undefined = articleData.media_url; + + if (enclosureUrl) { + const enclosureType: string = `audio/${enclosureUrl.replace(/\?.*$/, '').split(/\./).pop()}`; + + processedItem = { + ...processedItem, + enclosure_url: enclosureUrl, + enclosure_type: enclosureType, + enclosure_title: title, + itunes_item_image: image, + }; + } + + return { + ...item, + ...processedItem, + }; + }); + }) + ) + ).filter((_): _ is DataItem => true); + + return { + title: $('title').text(), + description: $('meta[name="description"]').attr('content'), + link: targetUrl, + item: items, + allowEmpty: true, + image: $('meta[property="og:image"]').attr('content'), + author: $('meta[property="og:site_name"]').attr('content'), + language, + feedLink: $('link[type="application/rss+xml"]').attr('href'), + itunes_author: $('meta[property="og:site_name"]').attr('content'), + itunes_category: 'Science', + id: $('meta[property="og:url"]').attr('content'), + }; +}; + +export const route: Route = { + path: ['/podcasts/:id?', '/podcast/:id?'], + name: 'Podcasts', + url: 'www.scientificamerican.com', + maintainers: ['nczitzk'], + handler, + example: '/scientificamerian/podcast', + parameters: { + id: 'ID, see below', + }, + description: `:::tip +If you subscribe to [Science Quickly](https://www.scientificamerican.com/podcast/science-quickly/),where the URL is \`https://www.scientificamerican.com/podcast/science-quickly/\`, extract the part \`https://www.scientificamerican.com/podcast/\` to the end, which is \`science-quickly\`, and use it as the parameter to fill in. Therefore, the route will be [\`/scientificamerian/podcasts/science-quickly\`](https://rsshub.app/scientificamerian/podcasts/science-quickly). +::: + +| All | Science Quickly | Uncertain | +| --- | --------------- | ------------ | +| | science-quickly | science-talk | +`, + categories: ['new-media'], + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportRadar: true, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['www.scientificamerican.com/podcasts/', 'www.scientificamerican.com/podcast/:id'], + target: (params) => { + const id: string = params.id; + + return `/scientificamerian/podcast${id ? `/${id}` : ''}`; + }, + }, + { + title: 'Science Quickly', + source: ['www.scientificamerican.com/podcast/science-quickly/'], + target: '/podcast/science-quickly', + }, + { + title: 'Uncertain', + source: ['www.scientificamerican.com/podcast/science-talk/'], + target: '/podcast/science-talk', + }, + ], + view: ViewType.Articles, + + zh: { + path: ['/podcasts/:id?', '/podcast/:id?'], + name: 'Podcasts', + url: 'www.scientificamerican.com', + maintainers: ['nczitzk'], + handler, + example: '/scientificamerian/podcast', + parameters: { + id: 'ID,见下表', + }, + description: `:::tip +若订阅 [Science Quickly](https://www.scientificamerican.com/podcast/science-quickly/),网址为 \`https://www.scientificamerican.com/podcast/science-quickly/\`,请截取 \`https://www.scientificamerican.com/podcast/\` 到末尾 \`/\` 的部分 \`science-quickly\` 作为 \`id\` 参数填入,此时目标路由为 [\`/scientificamerian/podcasts/science-quickly\`](https://rsshub.app/scientificamerian/podcasts/science-quickly)。 +::: + +| 全部 | Science Quickly | Uncertain | +| ---- | --------------- | ------------ | +| | science-quickly | science-talk | +`, + }, +}; diff --git a/lib/routes/scientificamerican/templates/description.art b/lib/routes/scientificamerican/templates/description.art new file mode 100644 index 00000000000000..ec6912ddf58022 --- /dev/null +++ b/lib/routes/scientificamerican/templates/description.art @@ -0,0 +1,31 @@ +{{ if images }} + {{ each images image }} + {{ if image?.src }} +
+ {{ image.alt }} +
+ {{ /if }} + {{ /each }} +{{ /if }} + +{{ if intro }} + {{@ intro }} +{{ /if }} + +{{ if content }} + {{ each content c }} + <{{ c.tag }}> + {{@ c.content }} + + {{ /each }} +{{ /if }} \ No newline at end of file