diff --git a/lib/v2/huxiu/article.js b/lib/v2/huxiu/article.js deleted file mode 100644 index 757aeaa05b3130..00000000000000 --- a/lib/v2/huxiu/article.js +++ /dev/null @@ -1,33 +0,0 @@ -const got = require('@/utils/got'); -const { parseDate } = require('@/utils/parse-date'); -const utils = require('./utils'); - -module.exports = async (ctx) => { - const link = `${utils.baseUrl}/article/`; - const { data } = await got.post(`${utils.articleApi}/web/article/articleList`, { - headers: { - Referer: link, - }, - form: { - platform: 'www', - pagesize: ctx.query.limit ? parseInt(ctx.query.limit) : 22, - }, - }); - - const list = data.data.dataList.map((item) => ({ - title: item.title, - link: `${utils.baseUrl}/article/${item.aid}.html`, - description: item.summary, - pubDate: parseDate(item.dateline, 'X'), - author: item.user_info.username, - })); - - const items = await utils.ProcessFeed(list, ctx.cache); - - ctx.state.data = { - title: '虎嗅网 - 首页资讯', - link, - description: '聚合优质的创新信息与人群,捕获精选 | 深度 | 犀利的商业科技资讯。在虎嗅,不错过互联网的每个重要时刻。', - item: items, - }; -}; diff --git a/lib/v2/huxiu/author.js b/lib/v2/huxiu/author.js deleted file mode 100644 index 03a7cbd765dd26..00000000000000 --- a/lib/v2/huxiu/author.js +++ /dev/null @@ -1,36 +0,0 @@ -const got = require('@/utils/got'); -const { parseDate } = require('@/utils/parse-date'); -const utils = require('./utils'); - -module.exports = async (ctx) => { - const { id } = ctx.params; - const link = `${utils.baseUrl}/member/${id}/article.html`; - - const articleList = await got({ - method: 'post', - url: `${utils.accountApi}/web/article/articleList`, - form: { - platform: 'www', - uid: id, - type: 0, - page: 1, - }, - }).json(); - - const list = articleList.data.datalist.map((item) => ({ - title: item.title, - link: `${utils.baseUrl}/article/${item.aid}.html`, - description: item.summary, - pubDate: parseDate(item.time), - })); - - const items = await utils.ProcessFeed(list, ctx.cache); - - const authorInfo = `虎嗅网 - ${items[0].author}`; - - ctx.state.data = { - title: authorInfo, - link, - item: items, - }; -}; diff --git a/lib/v2/huxiu/briefColumn.js b/lib/v2/huxiu/briefColumn.js index eeca7b6a2b92a6..f046e93a5aeff3 100644 --- a/lib/v2/huxiu/briefColumn.js +++ b/lib/v2/huxiu/briefColumn.js @@ -1,41 +1,29 @@ const got = require('@/utils/got'); -const { parseDate } = require('@/utils/parse-date'); -const { baseUrl, briefApi, ProcessFeed } = require('./utils'); + +const { apiBriefRootUrl, processItems, fetchBriefColumnData } = require('./util'); module.exports = async (ctx) => { const { id } = ctx.params; - const link = `${briefApi}/briefColumn/getContentListByCategoryId`; - const { data: response } = await got - .post(link, { - form: { - platform: 'www', - brief_column_id: id, - }, - }) - .json(); - const { - data: { data: briefDetail }, - } = await got.post(`${briefApi}/briefColumn/detail`, { + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20; + + const apiUrl = new URL('briefColumn/getContentListByCategoryId', apiBriefRootUrl).href; + + const { data: response } = await got.post(apiUrl, { form: { platform: 'www', brief_column_id: id, + pagesize: limit, }, }); - const list = response.datalist.map((item) => ({ - title: item.title, - link: `${baseUrl}/brief/${item.brief_id}`, - description: item.preface, - pubDate: parseDate(item.publish_time, 'X'), - })); + ctx.state.json = response.data.datalist; + + const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet); - const items = await ProcessFeed(list, ctx.cache); + const data = await fetchBriefColumnData(id); ctx.state.data = { - title: `虎嗅 - ${briefDetail.name}`, - description: briefDetail.summary, - image: briefDetail.head_img, - link: `${baseUrl}/briefColumn/${id}.html`, item: items, + ...data, }; }; diff --git a/lib/v2/huxiu/channel.js b/lib/v2/huxiu/channel.js new file mode 100644 index 00000000000000..4afa5e0a6edefa --- /dev/null +++ b/lib/v2/huxiu/channel.js @@ -0,0 +1,28 @@ +const got = require('@/utils/got'); + +const { rootUrl, apiArticleRootUrl, processItems, fetchData } = require('./util'); + +module.exports = async (ctx) => { + const { id } = ctx.params; + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20; + + const apiUrl = new URL(`web/${id ? 'channel' : 'article'}/articleList`, apiArticleRootUrl).href; + const currentUrl = new URL(id ? `channel/${id}.html` : 'article', rootUrl).href; + + const { data: response } = await got.post(apiUrl, { + form: { + platform: 'www', + channel_id: id, + pagesize: limit, + }, + }); + + const items = await processItems(response.data?.dataList ?? response.data.datalist, limit, ctx.cache.tryGet); + + const data = await fetchData(currentUrl); + + ctx.state.data = { + item: items, + ...data, + }; +}; diff --git a/lib/v2/huxiu/club.js b/lib/v2/huxiu/club.js new file mode 100644 index 00000000000000..6a307784b8bb13 --- /dev/null +++ b/lib/v2/huxiu/club.js @@ -0,0 +1,30 @@ +const got = require('@/utils/got'); + +const { apiBriefRootUrl, processItems, fetchClubData } = require('./util'); + +module.exports = async (ctx) => { + const { id } = ctx.params; + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20; + + const apiUrl = new URL('club/briefList', apiBriefRootUrl).href; + + const { data, briefColumnId } = await fetchClubData(id); + + const { data: response } = await got.post(apiUrl, { + form: { + platform: 'www', + club_id: id, + brief_column_id: briefColumnId, + pagesize: limit, + }, + }); + + ctx.state.json = response.data.datalist; + + const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet); + + ctx.state.data = { + item: items, + ...data, + }; +}; diff --git a/lib/v2/huxiu/collection.js b/lib/v2/huxiu/collection.js index ade81c626fc011..2e557875432b3f 100644 --- a/lib/v2/huxiu/collection.js +++ b/lib/v2/huxiu/collection.js @@ -1,39 +1,27 @@ const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const utils = require('./utils'); -const { parseDate } = require('@/utils/parse-date'); + +const { rootUrl, apiArticleRootUrl, processItems, fetchData } = require('./util'); module.exports = async (ctx) => { const { id } = ctx.params; - const link = `${utils.baseUrl}/collection/${id}.html`; - const { data: response } = await got(link, { - https: { - rejectUnauthorized: false, - }, - }); + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 10; - const $ = cheerio.load(response); + const apiUrl = new URL('web/collection/articleList', apiArticleRootUrl).href; + const currentUrl = new URL(`collection/${id}.html`, rootUrl).href; - const initialState = utils.parseInitialState($); - - const { collectionDetail } = initialState.collectionDetail; - const list = collectionDetail.article_list.datalist.map((e) => ({ - title: e.title, - link: `${utils.baseUrl}/article/${e.aid}.html`, - description: e.summary, - pubDate: parseDate(e.dateline, 'X'), - author: e.user_info.username, - })); + const { data: response } = await got.post(apiUrl, { + form: { + platform: 'www', + collection_id: id, + }, + }); - const items = await utils.ProcessFeed(list, ctx.cache); + const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet); - const info = `虎嗅 - ${collectionDetail.name}`; + const data = await fetchData(currentUrl); ctx.state.data = { - title: info, - description: collectionDetail.summary, - image: collectionDetail.icon, - link, item: items, + ...data, }; }; diff --git a/lib/v2/huxiu/maintainer.js b/lib/v2/huxiu/maintainer.js index 2e6f15e8f48653..9a5ae1428b4a64 100644 --- a/lib/v2/huxiu/maintainer.js +++ b/lib/v2/huxiu/maintainer.js @@ -1,8 +1,11 @@ module.exports = { - '/article': ['HenryQW'], - '/author/:id': ['HenryQW'], - '/collection/:id': ['AlexdanerZe'], + '/article': ['HenryQW', 'nczitzk'], + '/briefcolumn/:id': ['Fatpandac', 'nczitzk'], + '/channel/:id?': ['nczitzk'], + '/club/:id': ['nczitzk'], + '/collection/:id': ['AlexdanerZe', 'nczitzk'], + '/member/:id/:type?': ['HenryQW', 'nczitzk'], '/moment': ['nczitzk'], - '/tag/:id': ['xyqfer', 'HenryQW'], - '/search/:keyword': ['xyqfer', 'HenryQW'], + '/search/:keyword': ['xyqfer', 'HenryQW', 'nczitzk'], + '/tag/:id': ['xyqfer', 'HenryQW', 'nczitzk'], }; diff --git a/lib/v2/huxiu/member.js b/lib/v2/huxiu/member.js new file mode 100644 index 00000000000000..86c95c519dafbd --- /dev/null +++ b/lib/v2/huxiu/member.js @@ -0,0 +1,27 @@ +const got = require('@/utils/got'); + +const { rootUrl, apiMemberRootUrl, processItems, fetchData } = require('./util'); + +module.exports = async (ctx) => { + const { id, type = 'article' } = ctx.params; + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 10; + + const apiUrl = new URL(`web/${type}/${type}List`, apiMemberRootUrl).href; + const currentUrl = new URL(`member/${id}${type === 'article' ? '' : `/${type}`}.html`, rootUrl).href; + + const { data: response } = await got.post(apiUrl, { + form: { + platform: 'www', + uid: id, + }, + }); + + const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet); + + const data = await fetchData(currentUrl); + + ctx.state.data = { + item: items, + ...data, + }; +}; diff --git a/lib/v2/huxiu/moment.js b/lib/v2/huxiu/moment.js index fe8b9d3e0c98f3..644719ea5e4f49 100644 --- a/lib/v2/huxiu/moment.js +++ b/lib/v2/huxiu/moment.js @@ -1,39 +1,25 @@ const got = require('@/utils/got'); -const { parseDate } = require('@/utils/parse-date'); -const { art } = require('@/utils/render'); -const path = require('path'); -const { baseUrl: rootUrl, momentApi: apiRootUrl } = require('./utils'); + +const { rootUrl, apiMomentRootUrl, processItems, fetchData } = require('./util'); module.exports = async (ctx) => { - const currentUrl = `${rootUrl}/moment`; - const apiUrl = `${apiRootUrl}/web-v2/moment/feed`; + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20; + + const apiUrl = new URL('web-v2/moment/feed', apiMomentRootUrl).href; + const currentUrl = new URL('moment', rootUrl).href; - const response = await got({ - method: 'post', - url: apiUrl, + const { data: response } = await got.post(apiUrl, { form: { - last_dateline: parseInt(new Date().getTime() / 1000), platform: 'www', - is_ai: 0, }, }); - const items = response.data.data.moment_list.datalist[0].datalist.map((item) => ({ - title: item.content, - link: item.share_url, - author: item.user_info.username, - pubDate: parseDate(item.publish_time * 1000), - description: art(path.join(__dirname, 'templates/moment.art'), { - description: item.content, - images: item.img_urls, - video: item.video, - comments: item.comment, - }), - })); + const items = await processItems(response.data.moment_list.datalist[0].datalist, limit, ctx.cache.tryGet); + + const data = await fetchData(currentUrl); ctx.state.data = { - title: '虎嗅 - 24小时', - link: currentUrl, item: items, + ...data, }; }; diff --git a/lib/v2/huxiu/radar.js b/lib/v2/huxiu/radar.js index 88aade36c25fe2..f37fed6b6be7e6 100644 --- a/lib/v2/huxiu/radar.js +++ b/lib/v2/huxiu/radar.js @@ -3,44 +3,52 @@ module.exports = { _name: '虎嗅', '.': [ { - title: '首页资讯', - docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu', - source: ['/article', '/'], + title: '资讯', + docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-zi-xun', + source: ['/article'], target: '/huxiu/article', }, + { + title: '频道', + docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-pin-dao', + source: ['/channel'], + target: (params) => `/huxiu/channel/${params.id.replace(/\.html$/, '')}`, + }, { title: '24小时', - docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu', - source: ['/moment', '/'], + docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-24-xiao-shi', + source: ['/moment'], target: '/huxiu/moment', }, { title: '标签', - docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu', + docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-biao-qian', source: ['/tags/:id'], - target: (params) => `/huxiu/tag/${params.id.replace('.html', '')}`, + target: (params) => `/huxiu/tag/${params.id.replace(/\.html$/, '')}`, }, { title: '搜索', - docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu', + docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-sou-suo', + source: ['/'], + target: '/huxiu/search/:keyword', }, { title: '作者', - docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu', - source: ['/member/:id/*', '/'], - target: '/huxiu/author/:id', + docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-zuo-zhe', + source: ['/member/:id/:type'], + target: (params) => `/huxiu/member/${params.id}/${params.type.replace(/\.html$/, '')}`, }, { title: '文集', - docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu', - source: ['/collection/:id', '/'], - target: (params) => `/huxiu/collection/${params.id.replace('.html', '')}`, + docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-wen-ji', + source: ['/collection/:id'], + target: (params) => `/huxiu/collection/${params.id.replace(/\.html$/, '')}`, }, { title: '简报', - docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu', + docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-jian-bao', source: ['/briefColumn/:id', '/'], - target: (params) => `/huxiu/briefcolumn/${params.id.replace('.html', '')}`, + target: (params) => `/huxiu/briefcolumn/${params.id.replace(/\.html$/, '')}`, }, ], }, diff --git a/lib/v2/huxiu/router.js b/lib/v2/huxiu/router.js index d527815d6f566d..af15ad97b98ca6 100644 --- a/lib/v2/huxiu/router.js +++ b/lib/v2/huxiu/router.js @@ -1,9 +1,12 @@ module.exports = (router) => { - router.get('/article', require('./article')); - router.get('/author/:id', require('./author')); + router.get('/article', require('./channel')); + router.get('/author/:id/:type?', require('./member')); + router.get('/briefcolumn/:id', require('./briefColumn')); + router.get('/channel/:id?', require('./channel')); + router.get('/club/:id', require('./club')); router.get('/collection/:id', require('./collection')); + router.get('/member/:id/:type?', require('./member')); router.get('/moment', require('./moment')); - router.get('/tag/:id', require('./tag')); router.get('/search/:keyword', require('./search')); - router.get('/briefcolumn/:id', require('./briefColumn')); + router.get('/tag/:id', require('./tag')); }; diff --git a/lib/v2/huxiu/search.js b/lib/v2/huxiu/search.js index a379921bf017df..02943cd3dfb950 100644 --- a/lib/v2/huxiu/search.js +++ b/lib/v2/huxiu/search.js @@ -1,40 +1,35 @@ const got = require('@/utils/got'); -const utils = require('./utils'); + +const { rootUrl, apiSearchRootUrl, generateSignature, processItems, fetchData } = require('./util'); module.exports = async (ctx) => { const { keyword } = ctx.params; - const url = `${utils.searchApi}/api/article`; - const link = utils.baseUrl; + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20; - const { data } = await got.post(url, { - headers: { - Referer: utils.baseUrl, - }, + const apiUrl = new URL('api/article', apiSearchRootUrl).href; + const currentUrl = rootUrl; + + const { data: response } = await got.post(apiUrl, { searchParams: { platform: 'www', s: keyword, sort: '', page: 1, - pagesize: 20, - appid: 'hx_search', - ...utils.generateSignature(), + pagesize: limit, + appid: 'hx_search_202303', + ...generateSignature(), }, }); - const list = data.data.datalist.map((d) => ({ - title: d.title, - link: d.url.includes('article') ? `${d.url}.html` : d.url, - description: d.content, - author: d.author, - })); + const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet); + + const data = await fetchData(currentUrl); + data.title = `${keyword}-搜索结果-${data.title}`; - const items = await utils.ProcessFeed(list, ctx.cache); + ctx.state.json = response.data.datalist; - const info = `虎嗅网 - ${keyword}`; ctx.state.data = { - title: info, - link, - description: info, item: items, + ...data, }; }; diff --git a/lib/v2/huxiu/tag.js b/lib/v2/huxiu/tag.js index a1d1909a20588b..c6ca1433350a91 100644 --- a/lib/v2/huxiu/tag.js +++ b/lib/v2/huxiu/tag.js @@ -1,38 +1,26 @@ const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const utils = require('./utils'); -const { parseDate } = require('@/utils/parse-date'); -const timezone = require('@/utils/timezone'); + +const { rootUrl, processItems, fetchData } = require('./util'); module.exports = async (ctx) => { const { id } = ctx.params; - const link = `${utils.baseUrl}/tags/${id}.html`; - const { data } = await got(link, { - https: { - rejectUnauthorized: false, + const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 10; + + const apiUrl = new URL('v2_action/tag_article_list', rootUrl).href; + const currentUrl = new URL(`tags/${id}.html`, rootUrl).href; + + const { data: response } = await got.post(apiUrl, { + form: { + tag_id: id, }, }); - const $ = cheerio.load(data); - const list = $('.related-article li') - .toArray() - .map((e) => { - e = $(e); - const a = e.find('a'); - return { - title: a.text(), - link: `${utils.baseUrl}${a.attr('href')}`, - pubDate: timezone(parseDate(e.find('.time').text()), 8), - }; - }); + const items = await processItems(response.data, limit, ctx.cache.tryGet); - const items = await utils.ProcessFeed(list, ctx.cache); + const data = await fetchData(currentUrl); - const info = `虎嗅 - ${$('h1').text()}`; ctx.state.data = { - title: info, - link, - description: info, item: items, + ...data, }; }; diff --git a/lib/v2/huxiu/templates/brief.art b/lib/v2/huxiu/templates/brief.art deleted file mode 100644 index 0babfafd68d952..00000000000000 --- a/lib/v2/huxiu/templates/brief.art +++ /dev/null @@ -1,22 +0,0 @@ -{{ if brief.preface }}{{@ brief.preface }}
{{ /if }} - -{{ if brief.audio_info }} -
-
{{ brief.audio_title }}
- -
-
-{{ /if }} - -{{ if brief.part_list }}{{ each brief.part_list part }} - {{ if part.title }}

{{ part.title }}

{{ /if }} - {{ if part.content_info }}{{ each part.content_info content }} - {{ if content.type === 'text' }}{{@ content.text }}{{ /if }} - {{ if content.type === 'video' }} - - {{ /if }} - {{ if content.type === 'image'}}{{ /if }} - {{ /each }}
{{ /if }} -{{ /each }}{{ /if }} - -{{ if brief.peroration }}{{ brief.peroration }}{{ /if }} diff --git a/lib/v2/huxiu/templates/description.art b/lib/v2/huxiu/templates/description.art new file mode 100644 index 00000000000000..298a0053a9db9a --- /dev/null +++ b/lib/v2/huxiu/templates/description.art @@ -0,0 +1,46 @@ +{{ if (!video || !video.src) && image?.src }} +
+ +
+{{ /if }} + +{{ if audio?.src }} + +{{ /if }} + +{{ if video?.src }} + +{{ /if }} + +{{ if preface }} + {{@ preface }} +{{ /if }} + +{{ if summary }} + {{@ summary }} +{{ /if }} + +{{ if description }} + {{@ description }} +{{ /if }} \ No newline at end of file diff --git a/lib/v2/huxiu/templates/img.art b/lib/v2/huxiu/templates/img.art deleted file mode 100644 index d7b1dd45a51b92..00000000000000 --- a/lib/v2/huxiu/templates/img.art +++ /dev/null @@ -1,3 +0,0 @@ -{{ if img }} - -{{ /if }} diff --git a/lib/v2/huxiu/templates/moment.art b/lib/v2/huxiu/templates/moment.art deleted file mode 100644 index 6a0c4ca04d93fc..00000000000000 --- a/lib/v2/huxiu/templates/moment.art +++ /dev/null @@ -1,16 +0,0 @@ -{{@ description }} -{{ if images }} -{{ each images image }} - -{{ /each }} -{{ /if }} -{{ if video }} - -{{ /if }} -{{ if comments }} -{{ each comments.datalist comment }} -

{{ comment.user_info.username }}: {{ comment.content }} ({{ comment.show_time }})

-{{ /each }} -{{ /if }} \ No newline at end of file diff --git a/lib/v2/huxiu/templates/video.art b/lib/v2/huxiu/templates/video.art deleted file mode 100644 index 338b131088f974..00000000000000 --- a/lib/v2/huxiu/templates/video.art +++ /dev/null @@ -1,7 +0,0 @@ -{{ if video_info }} - -{{ /if }} diff --git a/lib/v2/huxiu/util.js b/lib/v2/huxiu/util.js new file mode 100644 index 00000000000000..5bb4b76d8f8dde --- /dev/null +++ b/lib/v2/huxiu/util.js @@ -0,0 +1,460 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); +const { art } = require('@/utils/render'); +const path = require('path'); +const CryptoJS = require('crypto-js'); + +const domain = 'huxiu.com'; +const rootUrl = `https://www.${domain}`; + +const apiArticleRootUrl = `https://api-article.${domain}`; +const apiBriefRootUrl = `https://api-brief.${domain}`; +const apiMemberRootUrl = `https://api-account.${domain}`; +const apiMomentRootUrl = `https://moment-api.${domain}`; +const apiSearchRootUrl = `https://search-api.${domain}`; + +/** + * Cleans up HTML data by removing specific elements and attributes. + * + * @param {string} data - The HTML data to clean up. + * @returns {string} - The cleaned up HTML data. + */ +const cleanUpHTML = (data) => { + const $ = cheerio.load(data); + + $('div.neirong-shouquan').remove(); + $('em.vote__bar, div.vote__btn, div.vote__time').remove(); + $('p img').each((_, e) => { + e = $(e); + e.parent().replaceWith( + art(path.join(__dirname, 'templates/description.art'), { + image: { + src: (e.prop('src') ?? e.prop('_src')).split(/\?/)[0], + width: e.prop('data-w'), + height: e.prop('data-h'), + }, + }) + ); + }); + $('p, span').each((_, e) => { + e = $(e); + if (e.contents().length === 1 && /^\s*$/.test(e.text())) { + e.remove(); + } else { + e.removeClass(); + e.removeAttr('data-check-id label class'); + } + }); + $('.text-big-title').each((_, e) => { + e.tagName = 'h3'; + e = $(e); + e.removeClass(); + e.removeAttr('class'); + }); + $('.text-sm-title').each((_, e) => { + e.tagName = 'h4'; + e = $(e); + e.removeClass(); + e.removeAttr('class'); + }); + + return $.html(); +}; + +/** + * Fetch brief column data for the specified ID. + * + * @param {string} url - The ID of the brief column to fetch data from. + * @returns {Promise} A promise that resolves to an object containing the fetched data + * to be added into `ctx.state.data`. + */ +const fetchBriefColumnData = async (id) => { + const apiBriefColumnUrl = new URL('briefColumn/detail', apiBriefRootUrl).href; + + const { + data: { data }, + } = await got.post(apiBriefColumnUrl, { + form: { + platform: 'www', + brief_column_id: id, + }, + }); + + const currentUrl = new URL(`club/${data.club_id}.html`, rootUrl).href; + + const { data: currentResponse } = await got(currentUrl); + + const $ = cheerio.load(currentResponse); + + const subtitle = `${data.name}-${data.sub_name}`; + const icon = new URL($('link[rel="apple-touch-icon"]').prop('href'), rootUrl).href; + const author = $('meta[name="author"]').prop('content'); + + return { + title: `${subtitle}-${author}`, + link: currentUrl, + description: data.summary, + language: $('html').prop('lang'), + image: data.head_img, + icon, + logo: icon, + subtitle, + author, + itunes_author: author, + itunes_category: 'News', + allowEmpty: true, + }; +}; + +/** + * Fetches club data for the specified ID and the ID of the default brief column. + * + * @param {string} id - The ID of the club to fetch data from. + * @returns {Promise} data - A promise that resolves to an object containing the fetched data + * to be added into `ctx.state.data`. + * @returns {string} id - the ID of the default brief column. + */ +const fetchClubData = async (id) => { + const currentUrl = new URL(`club/${id}.html`, rootUrl).href; + + const { data: currentResponse } = await got(currentUrl); + + const $ = cheerio.load(currentResponse); + + const title = $('title').text(); + const icon = new URL($('link[rel="apple-touch-icon"]').prop('href'), rootUrl).href; + const author = $('meta[name="author"]').prop('content'); + + return { + data: { + title, + link: currentUrl, + description: $('ul.content-item li.content').text().trim(), + language: $('html').prop('lang'), + image: $('div.header img.img').prop('data-src')?.split(/\?/)[0] ?? undefined, + icon, + logo: icon, + subtitle: title.split(/-/)[0], + author, + itunes_author: author, + itunes_category: 'News', + allowEmpty: true, + }, + briefColumnId: currentResponse.match(/"brief_column_id":"(\d+)",/)[1], + }; +}; + +/** + * Fetch data from the specified URL. + * + * @param {string} url - The URL to fetch data from. + * @returns {Promise} A promise that resolves to an object containing the fetched data + * to be added into `ctx.state.data`. + */ +const fetchData = async (url) => { + const { data: response } = await got(url); + + const $ = cheerio.load(response); + + const icon = new URL($('link[rel="apple-touch-icon"]').prop('href'), rootUrl).href; + const author = $('meta[name="author"]').prop('content'); + + return { + title: $('title').text(), + link: url, + description: $('div.tag-content').text() || $('span.author-intro').text() || $('p.collection__intro').text() || $('meta[name="description"]').prop('content'), + language: $('html').prop('lang'), + icon, + logo: icon, + subtitle: $('title').text().split(/-/)[0], + author, + itunes_author: author, + itunes_category: 'News', + allowEmpty: true, + }; +}; + +/** + * Fetches item data. + * + * @param {Object} item - The item to fetch data for. + * @returns {Promise} The fetched item data object. + */ +const fetchItem = async (item) => { + const { data: detailResponse } = await got(item.link); + + const state = parseInitialState(detailResponse); + const data = state.briefStoreModule?.brief_detail.brief ?? state.articleDetail?.articleDetail ?? undefined; + + if (!data) { + return item; + } + + const { processed: audio = undefined, processedItem: audioItem = {} } = processAudioInfo(data.audio_info); + + if (Object.keys(audioItem).length !== 0) { + audioItem.itunes_item_image = data.pic_path ?? data.share_info?.share_img ?? undefined; + } + + const { processed: video = undefined, processedItem: videoItem = {} } = processVideoInfo(data.video_info); + + item.title = data.title ?? item.title; + item.description = art(path.join(__dirname, 'templates/description.art'), { + image: { + src: data.pic_path, + }, + video, + audio, + preface: cleanUpHTML(data.content_preface ?? data.preface), + summary: data.ai_summary, + description: cleanUpHTML(data.content), + }); + item.author = data.user_info?.username ?? item.author; + item.category = [data.video_article_tag, data.brief_column?.name ?? undefined, data.club_info?.name ?? undefined, ...(data.tags_info?.map((c) => c.name) ?? []), ...(data.relation_info?.channel?.map((c) => c.name) ?? [])].filter( + (c) => c + ); + item.pubDate = parseDate(data.dateline ?? data.publish_time, 'X'); + item.upvote = data.agreenum ?? item.upvote; + item.comments = data.commentnum ?? data.total_comment_num ?? item.comments; + + item.upvote = parseInt(item.upvote, 10); + item.comments = parseInt(item.comments, 10); + + return { + ...audioItem, + ...videoItem, + ...item, + }; +}; + +/** + * Generates a random nonce string. + * + * @returns {string} The generated nonce string. + */ +const generateNonce = () => { + let nonce = ''; + const e = 'abcdefghijklmnopqrstuvwxyz1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ'; + const t = 16; + for (let i = 0; i < t; i++) { + nonce += e.charAt(Math.floor(Math.random() * e.length)); + } + return nonce; +}; + +/** + * Generates a signature object containing a nonce, timestamp, and signature value. + * + * @returns {string} nonce - The generated nonce. + * @returns {string} timestamp - The timestamp. + * @returns {string} signature - The calculated signature value. + */ +const generateSignature = () => { + const timestamp = Math.round(new Date().getTime() / 1000).toString(); + + const appSecret = 'hUzaABtNfDE-6UiyaYhfsmjW-8dnoyVc'; + const nonce = generateNonce(); + const r = [appSecret, timestamp, nonce].sort(); + return { + nonce, + timestamp, + signature: CryptoJS.SHA1(r[0] + r[1] + r[2]).toString(), + }; +}; + +/** + * Parses the initial state from the provided data. + * + * @param {string} data - The data to parse the initial state from. + * @returns {Object|undefined} - The parsed initial state object, or undefined if not found. + */ +const parseInitialState = (data) => { + const matches = data.match(/window\.__INITIAL_STATE__=(\{.*?\});\(function\(\)/); + if (matches) { + return JSON.parse(matches[1]); + } + return undefined; +}; + +const audioQualities = ['', 'low']; + +/** + * Processes the audio information and returns the processed data. + * + * @param {Object} info - The audio information to process. + * @returns {Object} - An object containing the processed audio data. + */ +const processAudioInfo = (info) => { + const quality = info ? audioQualities.find((quality) => info.hasOwnProperty(`audio_${quality === '' ? '' : `${quality}_`}path`)) : undefined; + + if (quality === undefined) { + return { + processed: undefined, + processedItem: {}, + }; + } + + const linkKey = `audio_${quality}path`; + const sizeKey = `audio_${quality}size`; + + const processed = { + duration: info.format_length_new ?? info.format_length, + size: info.hasOwnProperty(sizeKey) ? info[sizeKey] : undefined, + src: info[linkKey], + type: `audio/${info[linkKey].split(/\./).pop()}`, + }; + + const processedItem = { + itunes_duration: processed.duration, + enclosure_url: processed.src, + enclosure_length: processed.size, + enclosure_type: processed.type, + }; + + return { + processed, + processedItem, + }; +}; + +/** + * Process the item list and return the resulting array. + * + * @param {Object[]} items - The items to process. + * @param {number} limit - The maximum number of items to process. + * @param {Function} tryGet - The tryGet function that handles the retrieval process. + * @returns {Promise} - A promise that resolves to an array of processed items. + */ +const processItems = async (items, limit, tryGet) => { + items = items + .map((item) => { + let guid = ''; + let link = ''; + + if (item.moment_id) { + guid = `huxiu-moment-${item.moment_id}`; + if (item.url) { + link = item.url; + } else { + link = new URL(`moment/${item.moment_id}.html`, rootUrl).href; + } + } else if (item.brief_id || /huxiu\.com\/brief\//.test(item.url)) { + item.brief_id = item.brief_id ?? item.aid; + guid = `huxiu-brief-${item.brief_id}`; + link = new URL(`brief/${item.brief_id}.html`, rootUrl).href; + } else if (item.aid) { + guid = `huxiu-article-${item.aid}`; + link = new URL(`article/${item.aid}.html`, rootUrl).href; + } else { + return undefined; + } + + const { processed: audio = undefined, processedItem: audioItem = {} } = processAudioInfo(item.audio_info); + + if (Object.keys(audioItem).length !== 0) { + audioItem.itunes_item_image = item.pic_path ?? item.share_info?.share_img ?? undefined; + } + + const { processed: video = undefined, processedItem: videoItem = {} } = processVideoInfo(item.video_info); + + const upvotes = item.count_info?.agree ?? item.count_info?.favtimes ?? item.agree_num ?? 0; + const downvotes = item.count_info?.disagree ?? 0; + const comments = item.count_info?.total_comment_num ?? item.count_info?.commentnum ?? item.total_comment_num ?? item.commentnum ?? 0; + + return { + ...audioItem, + ...videoItem, + title: (item.title ?? item.summary ?? item.content)?.replace(/<\/?(?:em|br)?>/g, ''), + link, + description: art(path.join(__dirname, 'templates/description.art'), { + image: { + src: item.origin_pic_path ?? item.pic_path ?? item.big_pic_path?.split(/\?/)[0] ?? undefined, + }, + audio, + video, + summary: item.summary ?? item.content ?? item.preface, + }), + author: item.user_info?.username ?? item.brief_column?.name ?? item.author_info?.username ?? item.author, + guid, + pubDate: item.publish_time ?? item.dateline ? parseDate(item.publish_time ?? item.dateline, 'X') : undefined, + upvotes: parseInt(upvotes, 10), + downvotes: parseInt(downvotes, 10), + comments: parseInt(comments, 10), + }; + }) + .filter((item) => item) + .slice(0, limit); + + return await Promise.all( + items.map((item) => + tryGet(item.guid, async () => { + if (!new RegExp(domain, 'i').test(new URL(item.link).hostname)) { + return item; + } else if (!item.guid.startsWith('huxiu-moment')) { + return await fetchItem(item); + } + + return item; + }) + ) + ); +}; + +const videoQualities = ['fhd', 'fhd_medium', 'wifi', 'fhd_low', 'flow', 'hd', 'sd']; + +/** + * Processes the video information and returns the processed data. + * + * @param {Object} info - The video information to process. + * @returns {Object} - An object containing the processed video data. + */ +const processVideoInfo = (info) => { + const quality = info ? videoQualities.find((quality) => info.hasOwnProperty(`${quality}_link`)) : undefined; + + if (quality === undefined) { + return { + processed: undefined, + processedItem: {}, + }; + } + + const linkKey = `${quality}_link`; + const sizeKey = `origin_${quality}_size`; + + const processed = { + duration: info.duration ?? info.origin_duration, + poster: info.cover ?? info.custom_cover_path ?? info.gif_path, + size: info.hasOwnProperty(sizeKey) ? info[sizeKey] : undefined, + src: info[linkKey], + type: `video/${info[linkKey].split(/\./).pop()}`, + }; + + const processedItem = { + itunes_item_image: processed.poster, + itunes_duration: processed.duration, + enclosure_url: processed.src, + enclosure_length: processed.size, + enclosure_type: processed.type, + }; + + return { + processed, + processedItem, + }; +}; + +module.exports = { + rootUrl, + apiArticleRootUrl, + apiBriefRootUrl, + apiMemberRootUrl, + apiMomentRootUrl, + apiSearchRootUrl, + + fetchBriefColumnData, + fetchClubData, + fetchData, + generateSignature, + processItems, +}; diff --git a/lib/v2/huxiu/utils.js b/lib/v2/huxiu/utils.js deleted file mode 100644 index 3774a8ec7ac96f..00000000000000 --- a/lib/v2/huxiu/utils.js +++ /dev/null @@ -1,154 +0,0 @@ -const cheerio = require('cheerio'); -const { parseDate } = require('@/utils/parse-date'); -const got = require('@/utils/got'); -const { art } = require('@/utils/render'); -const path = require('path'); -const CryptoJS = require('crypto-js'); - -const accountApi = 'https://account-api.huxiu.com'; -const articleApi = 'https://api-article.huxiu.com'; -const baseUrl = 'https://www.huxiu.com'; -const briefApi = 'https://api-brief.huxiu.com'; -const momentApi = 'https://moment-api.huxiu.com'; -const searchApi = 'https://search-api.huxiu.com'; - -const ProcessFeed = (list, cache) => - Promise.all( - list.map((item) => - cache.tryGet(item.link, async () => { - const response = await got(item.link, { - https: { - rejectUnauthorized: false, - }, - }); - const $ = cheerio.load(response.data); - const initialState = parseInitialState($); - - if (item.link.startsWith(`${baseUrl}/article/`)) { - fetchArticle(initialState, item); - } else if (item.link.startsWith(`${baseUrl}/brief/`)) { - fetchBrief(initialState, item); - } - - return item; - }) - ) - ); - -const fetchArticle = (initialState, item) => { - const { articleDetail } = initialState.articleDetail; - const topImg = art(path.join(__dirname, 'templates/img.art'), { - img: articleDetail.pic_path.split('?')[0], - }); - let video = ''; - if (articleDetail.video_info) { - video = art(path.join(__dirname, 'templates/video.art'), { - video_info: articleDetail.video_info, - }); - } - - const $ = cheerio.load(articleDetail.content, null, false); - - $('.text-big-title').each((_, e) => (e.tagName = 'h3')); - fixImg($); - removeExtraLinebreak($); - // $('img.dialog_add_wxxy_qrcode_icon').remove(); - - item.description = video + topImg + $.html(); - item.author = articleDetail.author; - item.category = articleDetail.tags_info.map((tag) => tag.name); -}; - -const fetchBrief = (initialState, item) => { - const { brief_detail } = initialState.briefStoreModule; - const { brief_column } = brief_detail; - - const description = art(path.join(__dirname, 'templates/brief.art'), { - brief: brief_detail.brief, - }); - - const $ = cheerio.load(description, null, false); - - $('button.black-dot').remove(); - fixImg($); - removeExtraLinebreak($); - - item.description = $.html(); - item.author = brief_detail.brief.publisher_list.map((item) => item.username).join(', '); - item.pubDate = parseDate(brief_column.update_time, 'X'); -}; - -const parseInitialState = ($) => - JSON.parse( - $('script') - .text() - .match(/window\.__INITIAL_STATE__=(\{.*?\});\(function\(\)/)[1] - ); - -const fixImg = ($) => { - $('img.lazyImg, img.js-preview').each((_, e) => { - if (e.attribs._src) { - e.attribs.src = e.attribs._src.split('?')[0]; - delete e.attribs._src; - } - if (e.attribs.src.includes('?')) { - e.attribs.src = e.attribs.src.split('?')[0]; - } - if (e.attribs['data-w']) { - e.attribs.width = e.attribs['data-w']; - delete e.attribs['data-w']; - } - if (e.attribs['data-h']) { - e.attribs.height = e.attribs['data-h']; - delete e.attribs['data-h']; - } - }); -}; - -const removeExtraLinebreak = ($) => { - $('p').each((_, e) => { - e = $(e); - if (e.find('img').length === 0 && e.text().match(/^\s*$/)) { - e.remove(); - } - }); -}; - -const generateNonce = () => { - let nonce = ''; - const e = 'abcdefghijklmnopqrstuvwxyz1234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - const t = 16; - for (let i = 0; i < t; i++) { - nonce += e.charAt(Math.floor(Math.random() * e.length)); - } - return nonce; -}; - -const generateSignature = () => { - const timestamp = Math.round(new Date().getTime() / 1000).toString(); - // const appSecret = '4nHzGgGt7WX4zFTsTKocpxg4dzU6-wUi'; - const appSecret = 'hUzaVKtNfDE-6UiyaJdfsmjW-8dwoyVc'; // appid: 'hx_search' - const nonce = generateNonce(); - const r = [appSecret, timestamp, nonce].sort(); - return { - nonce, - timestamp, - signature: CryptoJS.SHA1(r[0] + r[1] + r[2]).toString(), - }; -}; - -module.exports = { - accountApi, - articleApi, - baseUrl, - briefApi, - momentApi, - searchApi, - ProcessFeed, - fetchArticle, - fetchBrief, - parseInitialState, - fixImg, - removeExtraLinebreak, - generateSignature, -}; diff --git a/website/docs/routes/new-media.mdx b/website/docs/routes/new-media.mdx index 4b40652189e244..f61891ca1edb08 100644 --- a/website/docs/routes/new-media.mdx +++ b/website/docs/routes/new-media.mdx @@ -3483,33 +3483,73 @@ others = 热点新闻 + 滚动新闻 ## 虎嗅 {#hu-xiu} -### 首页资讯 {#hu-xiu-shou-ye-zi-xun} +### 资讯 {#hu-xiu-zi-xun} - + + +### 频道 {#hu-xiu-pin-dao} + + + +| 视频 | 车与出行 | 年轻一代 | 十亿消费者 | 前沿科技 | +| --- | ---- | ---- | ----- | ---- | +| 10 | 21 | 106 | 103 | 105 | + +| 财经 | 娱乐淘金 | 医疗健康 | 文化教育 | 出海 | +| --- | ---- | ---- | ---- | --- | +| 115 | 22 | 111 | 113 | 114 | + +| 金融地产 | 企业服务 | 创业维艰 | 社交通讯 | 全球热点 | 生活腔调 | +| ---- | ---- | ---- | ---- | ---- | ---- | +| 102 | 110 | 2 | 112 | 107 | 4 | + + ### 24 小时 {#hu-xiu-24-xiao-shi} - + + +### 源流 {#hu-xiu-yuan-liu} + + + +更多源流请参见 [源流广场](https://www.huxiu.com/club) + + ### 标签 {#hu-xiu-biao-qian} - + + +更多标签请参见 [标签](https://www.huxiu.com/tags) + + ### 搜索 {#hu-xiu-sou-suo} - + ### 作者 {#hu-xiu-zuo-zhe} - + + +| TA的文章 | TA的24小时 | +| ------- | ------- | +| article | moment | + + ### 文集 {#hu-xiu-wen-ji} - + + +更多文集请参见 [文集](https://www.huxiu.com/collection) + + ### 简报 {#hu-xiu-jian-bao} - + ## 互动吧 {#hu-dong-ba}