From e7f20b43b7d59422d047b7b51fbe3a78513d2640 Mon Sep 17 00:00:00 2001 From: Ethan Shen <42264778+nczitzk@users.noreply.github.com> Date: Fri, 29 Dec 2023 22:27:52 +0800 Subject: [PATCH] feat(route): add Cool Papers (#14129) * feat(route): add Cool Papers * fix typo * fix: remove kimi chat content * docs: fix typo --------- --- lib/v2/papers/index.js | 76 +++++++++++++++++++++++++ lib/v2/papers/maintainer.js | 3 + lib/v2/papers/radar.js | 17 ++++++ lib/v2/papers/router.js | 3 + lib/v2/papers/templates/description.art | 15 +++++ website/docs/routes/journal.mdx | 15 +++++ 6 files changed, 129 insertions(+) create mode 100644 lib/v2/papers/index.js create mode 100644 lib/v2/papers/maintainer.js create mode 100644 lib/v2/papers/radar.js create mode 100644 lib/v2/papers/router.js create mode 100644 lib/v2/papers/templates/description.art diff --git a/lib/v2/papers/index.js b/lib/v2/papers/index.js new file mode 100644 index 00000000000000..e8437ad52eb729 --- /dev/null +++ b/lib/v2/papers/index.js @@ -0,0 +1,76 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); +const { art } = require('@/utils/render'); +const path = require('path'); + +module.exports = async (ctx) => { + const { category = 'arxiv/cs.CL' } = ctx.params; + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 150; + + const rootUrl = 'https://papers.cool'; + const currentUrl = new URL(category, rootUrl).href; + + const site = category.split(/\//)[0]; + const apiKimiUrl = new URL(`${site}/kimi/`, rootUrl).href; + + const { data: response } = await got(currentUrl); + + const $ = cheerio.load(response); + + const pubDate = parseDate( + $('p.info') + .first() + .text() + .match(/(\d+\s\w+\s\d{4})/)[1], + ['DD MMM YYYY', 'D MMM YYYY'] + ); + + const items = $('div.panel') + .slice(0, limit) + .toArray() + .map((item) => { + item = $(item); + + const id = item.prop('id'); + const kimiUrl = new URL(id, apiKimiUrl).href; + const enclosureUrl = + item + .find('a.pdf-preview') + .prop('onclick') + .match(/'(http.*?)'/)?.[1] ?? undefined; + + return { + title: item.find('span[id]').first().text(), + link: kimiUrl, + description: art(path.join(__dirname, 'templates/description.art'), { + kimiUrl, + siteUrl: item.find('a').first().prop('href'), + pdfUrl: enclosureUrl, + summary: item.find('p.summary').text(), + }), + author: item + .find('p.authors a') + .toArray() + .map((a) => $(a).text()) + .join('; '), + guid: `${currentUrl}#${id}`, + pubDate, + enclosure_url: enclosureUrl, + enclosure_type: enclosureUrl ? 'application/pdf' : undefined, + }; + }); + + const title = $('title').text(); + const icon = new URL('favicon.ico', rootUrl).href; + + ctx.state.data = { + item: items, + title: title.split(/-/)[0].trim(), + link: currentUrl, + description: title, + icon, + logo: icon, + subtitle: $('h1').first().text(), + }; +}; diff --git a/lib/v2/papers/maintainer.js b/lib/v2/papers/maintainer.js new file mode 100644 index 00000000000000..81eb78d7328b63 --- /dev/null +++ b/lib/v2/papers/maintainer.js @@ -0,0 +1,3 @@ +module.exports = { + '/:category?': ['nczitzk'], +}; diff --git a/lib/v2/papers/radar.js b/lib/v2/papers/radar.js new file mode 100644 index 00000000000000..7c5b25b9706b75 --- /dev/null +++ b/lib/v2/papers/radar.js @@ -0,0 +1,17 @@ +module.exports = { + 'papers.cool': { + _name: 'Cool Papers', + '.': [ + { + title: 'Category', + docs: 'https://docs.rsshub.app/routes/journal#cool-papers-category', + source: ['/:category*'], + target: (params) => { + const category = params.category; + + return `/papers${category ? `/${category}` : ''}`; + }, + }, + ], + }, +}; diff --git a/lib/v2/papers/router.js b/lib/v2/papers/router.js new file mode 100644 index 00000000000000..9aa561a602a912 --- /dev/null +++ b/lib/v2/papers/router.js @@ -0,0 +1,3 @@ +module.exports = (router) => { + router.get('/:category*', require('./')); +}; diff --git a/lib/v2/papers/templates/description.art b/lib/v2/papers/templates/description.art new file mode 100644 index 00000000000000..368ea61f8ee842 --- /dev/null +++ b/lib/v2/papers/templates/description.art @@ -0,0 +1,15 @@ +{{ if pdfUrl }} + [PDF] +{{ /if }} + +{{ if siteUrl }} + [Site] +{{ /if }} + +{{ if kimiUrl }} + [Kimi] +{{ /if }} + +{{ if summary }} +

{{ summary }}

+{{ /if }} \ No newline at end of file diff --git a/website/docs/routes/journal.mdx b/website/docs/routes/journal.mdx index c87650d69b5319..0ee331017ef2e2 100644 --- a/website/docs/routes/journal.mdx +++ b/website/docs/routes/journal.mdx @@ -114,6 +114,21 @@ Including 'cell', 'cancer-cell', 'cell-chemical-biology', 'cell-host-microbe', 'cell-metabolism', 'cell-reports', 'cell-reports-physical-science', 'cell-stem-cell', 'cell-systems', 'chem', 'current-biology', 'developmental-cell', 'immunity', 'joule', 'matter', 'molecular-cell', 'neuron', 'one-earth' and 'structure'. +## Cool Papers {#cool-papers} + +### Category {#cool-papers-category} + + + | Category | id | + | ----------------------------------------------------- | ----------- | + | Arxiv Computation and Language (cs.CL) | arxiv/cs.CL | + | Arxiv Machine Learning (cs.LG) | arxiv/cs.LG | + | Arxiv Artificial Intelligence (cs.AI) | arxiv/cs.AI | + | Arxiv Information Retrieval (cs.IR) | arxiv/cs.IR | + | Arxiv Computer Vision and Pattern Recognition (cs.CV) | arxiv/cs.CV | + | Arxiv Machine Learning (stat.ML) | arxiv/stat.ML | + + ## Deloitte {#deloitte} ### Articles {#deloitte-articles}