From 804a4abbf3bce9f0ce61a934b5ae10f4f6fec2e3 Mon Sep 17 00:00:00 2001
From: Ethan Shen <42264778+nczitzk@users.noreply.github.com>
Date: Tue, 5 Dec 2023 00:38:27 +0800
Subject: [PATCH] =?UTF-8?q?feat(route):=20add=20=E8=99=8E=E5=97=85?=
=?UTF-8?q?=E9=A2=91=E9=81=93=20&=20=E6=BA=90=E6=B5=81=20(#13945)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* feat(route): add 虎嗅频道 & 源流
* fix: limit to 20
* docs: remove rssbud
---
lib/v2/huxiu/article.js | 33 --
lib/v2/huxiu/author.js | 36 --
lib/v2/huxiu/briefColumn.js | 38 +-
lib/v2/huxiu/channel.js | 28 ++
lib/v2/huxiu/club.js | 30 ++
lib/v2/huxiu/collection.js | 40 +--
lib/v2/huxiu/maintainer.js | 13 +-
lib/v2/huxiu/member.js | 27 ++
lib/v2/huxiu/moment.js | 36 +-
lib/v2/huxiu/radar.js | 40 ++-
lib/v2/huxiu/router.js | 11 +-
lib/v2/huxiu/search.js | 37 +-
lib/v2/huxiu/tag.js | 38 +-
lib/v2/huxiu/templates/brief.art | 22 --
lib/v2/huxiu/templates/description.art | 46 +++
lib/v2/huxiu/templates/img.art | 3 -
lib/v2/huxiu/templates/moment.art | 16 -
lib/v2/huxiu/templates/video.art | 7 -
lib/v2/huxiu/util.js | 460 +++++++++++++++++++++++++
lib/v2/huxiu/utils.js | 154 ---------
website/docs/routes/new-media.mdx | 56 ++-
21 files changed, 745 insertions(+), 426 deletions(-)
delete mode 100644 lib/v2/huxiu/article.js
delete mode 100644 lib/v2/huxiu/author.js
create mode 100644 lib/v2/huxiu/channel.js
create mode 100644 lib/v2/huxiu/club.js
create mode 100644 lib/v2/huxiu/member.js
delete mode 100644 lib/v2/huxiu/templates/brief.art
create mode 100644 lib/v2/huxiu/templates/description.art
delete mode 100644 lib/v2/huxiu/templates/img.art
delete mode 100644 lib/v2/huxiu/templates/moment.art
delete mode 100644 lib/v2/huxiu/templates/video.art
create mode 100644 lib/v2/huxiu/util.js
delete mode 100644 lib/v2/huxiu/utils.js
diff --git a/lib/v2/huxiu/article.js b/lib/v2/huxiu/article.js
deleted file mode 100644
index 757aeaa05b3130..00000000000000
--- a/lib/v2/huxiu/article.js
+++ /dev/null
@@ -1,33 +0,0 @@
-const got = require('@/utils/got');
-const { parseDate } = require('@/utils/parse-date');
-const utils = require('./utils');
-
-module.exports = async (ctx) => {
- const link = `${utils.baseUrl}/article/`;
- const { data } = await got.post(`${utils.articleApi}/web/article/articleList`, {
- headers: {
- Referer: link,
- },
- form: {
- platform: 'www',
- pagesize: ctx.query.limit ? parseInt(ctx.query.limit) : 22,
- },
- });
-
- const list = data.data.dataList.map((item) => ({
- title: item.title,
- link: `${utils.baseUrl}/article/${item.aid}.html`,
- description: item.summary,
- pubDate: parseDate(item.dateline, 'X'),
- author: item.user_info.username,
- }));
-
- const items = await utils.ProcessFeed(list, ctx.cache);
-
- ctx.state.data = {
- title: '虎嗅网 - 首页资讯',
- link,
- description: '聚合优质的创新信息与人群,捕获精选 | 深度 | 犀利的商业科技资讯。在虎嗅,不错过互联网的每个重要时刻。',
- item: items,
- };
-};
diff --git a/lib/v2/huxiu/author.js b/lib/v2/huxiu/author.js
deleted file mode 100644
index 03a7cbd765dd26..00000000000000
--- a/lib/v2/huxiu/author.js
+++ /dev/null
@@ -1,36 +0,0 @@
-const got = require('@/utils/got');
-const { parseDate } = require('@/utils/parse-date');
-const utils = require('./utils');
-
-module.exports = async (ctx) => {
- const { id } = ctx.params;
- const link = `${utils.baseUrl}/member/${id}/article.html`;
-
- const articleList = await got({
- method: 'post',
- url: `${utils.accountApi}/web/article/articleList`,
- form: {
- platform: 'www',
- uid: id,
- type: 0,
- page: 1,
- },
- }).json();
-
- const list = articleList.data.datalist.map((item) => ({
- title: item.title,
- link: `${utils.baseUrl}/article/${item.aid}.html`,
- description: item.summary,
- pubDate: parseDate(item.time),
- }));
-
- const items = await utils.ProcessFeed(list, ctx.cache);
-
- const authorInfo = `虎嗅网 - ${items[0].author}`;
-
- ctx.state.data = {
- title: authorInfo,
- link,
- item: items,
- };
-};
diff --git a/lib/v2/huxiu/briefColumn.js b/lib/v2/huxiu/briefColumn.js
index eeca7b6a2b92a6..f046e93a5aeff3 100644
--- a/lib/v2/huxiu/briefColumn.js
+++ b/lib/v2/huxiu/briefColumn.js
@@ -1,41 +1,29 @@
const got = require('@/utils/got');
-const { parseDate } = require('@/utils/parse-date');
-const { baseUrl, briefApi, ProcessFeed } = require('./utils');
+
+const { apiBriefRootUrl, processItems, fetchBriefColumnData } = require('./util');
module.exports = async (ctx) => {
const { id } = ctx.params;
- const link = `${briefApi}/briefColumn/getContentListByCategoryId`;
- const { data: response } = await got
- .post(link, {
- form: {
- platform: 'www',
- brief_column_id: id,
- },
- })
- .json();
- const {
- data: { data: briefDetail },
- } = await got.post(`${briefApi}/briefColumn/detail`, {
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20;
+
+ const apiUrl = new URL('briefColumn/getContentListByCategoryId', apiBriefRootUrl).href;
+
+ const { data: response } = await got.post(apiUrl, {
form: {
platform: 'www',
brief_column_id: id,
+ pagesize: limit,
},
});
- const list = response.datalist.map((item) => ({
- title: item.title,
- link: `${baseUrl}/brief/${item.brief_id}`,
- description: item.preface,
- pubDate: parseDate(item.publish_time, 'X'),
- }));
+ ctx.state.json = response.data.datalist;
+
+ const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet);
- const items = await ProcessFeed(list, ctx.cache);
+ const data = await fetchBriefColumnData(id);
ctx.state.data = {
- title: `虎嗅 - ${briefDetail.name}`,
- description: briefDetail.summary,
- image: briefDetail.head_img,
- link: `${baseUrl}/briefColumn/${id}.html`,
item: items,
+ ...data,
};
};
diff --git a/lib/v2/huxiu/channel.js b/lib/v2/huxiu/channel.js
new file mode 100644
index 00000000000000..4afa5e0a6edefa
--- /dev/null
+++ b/lib/v2/huxiu/channel.js
@@ -0,0 +1,28 @@
+const got = require('@/utils/got');
+
+const { rootUrl, apiArticleRootUrl, processItems, fetchData } = require('./util');
+
+module.exports = async (ctx) => {
+ const { id } = ctx.params;
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20;
+
+ const apiUrl = new URL(`web/${id ? 'channel' : 'article'}/articleList`, apiArticleRootUrl).href;
+ const currentUrl = new URL(id ? `channel/${id}.html` : 'article', rootUrl).href;
+
+ const { data: response } = await got.post(apiUrl, {
+ form: {
+ platform: 'www',
+ channel_id: id,
+ pagesize: limit,
+ },
+ });
+
+ const items = await processItems(response.data?.dataList ?? response.data.datalist, limit, ctx.cache.tryGet);
+
+ const data = await fetchData(currentUrl);
+
+ ctx.state.data = {
+ item: items,
+ ...data,
+ };
+};
diff --git a/lib/v2/huxiu/club.js b/lib/v2/huxiu/club.js
new file mode 100644
index 00000000000000..6a307784b8bb13
--- /dev/null
+++ b/lib/v2/huxiu/club.js
@@ -0,0 +1,30 @@
+const got = require('@/utils/got');
+
+const { apiBriefRootUrl, processItems, fetchClubData } = require('./util');
+
+module.exports = async (ctx) => {
+ const { id } = ctx.params;
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20;
+
+ const apiUrl = new URL('club/briefList', apiBriefRootUrl).href;
+
+ const { data, briefColumnId } = await fetchClubData(id);
+
+ const { data: response } = await got.post(apiUrl, {
+ form: {
+ platform: 'www',
+ club_id: id,
+ brief_column_id: briefColumnId,
+ pagesize: limit,
+ },
+ });
+
+ ctx.state.json = response.data.datalist;
+
+ const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet);
+
+ ctx.state.data = {
+ item: items,
+ ...data,
+ };
+};
diff --git a/lib/v2/huxiu/collection.js b/lib/v2/huxiu/collection.js
index ade81c626fc011..2e557875432b3f 100644
--- a/lib/v2/huxiu/collection.js
+++ b/lib/v2/huxiu/collection.js
@@ -1,39 +1,27 @@
const got = require('@/utils/got');
-const cheerio = require('cheerio');
-const utils = require('./utils');
-const { parseDate } = require('@/utils/parse-date');
+
+const { rootUrl, apiArticleRootUrl, processItems, fetchData } = require('./util');
module.exports = async (ctx) => {
const { id } = ctx.params;
- const link = `${utils.baseUrl}/collection/${id}.html`;
- const { data: response } = await got(link, {
- https: {
- rejectUnauthorized: false,
- },
- });
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 10;
- const $ = cheerio.load(response);
+ const apiUrl = new URL('web/collection/articleList', apiArticleRootUrl).href;
+ const currentUrl = new URL(`collection/${id}.html`, rootUrl).href;
- const initialState = utils.parseInitialState($);
-
- const { collectionDetail } = initialState.collectionDetail;
- const list = collectionDetail.article_list.datalist.map((e) => ({
- title: e.title,
- link: `${utils.baseUrl}/article/${e.aid}.html`,
- description: e.summary,
- pubDate: parseDate(e.dateline, 'X'),
- author: e.user_info.username,
- }));
+ const { data: response } = await got.post(apiUrl, {
+ form: {
+ platform: 'www',
+ collection_id: id,
+ },
+ });
- const items = await utils.ProcessFeed(list, ctx.cache);
+ const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet);
- const info = `虎嗅 - ${collectionDetail.name}`;
+ const data = await fetchData(currentUrl);
ctx.state.data = {
- title: info,
- description: collectionDetail.summary,
- image: collectionDetail.icon,
- link,
item: items,
+ ...data,
};
};
diff --git a/lib/v2/huxiu/maintainer.js b/lib/v2/huxiu/maintainer.js
index 2e6f15e8f48653..9a5ae1428b4a64 100644
--- a/lib/v2/huxiu/maintainer.js
+++ b/lib/v2/huxiu/maintainer.js
@@ -1,8 +1,11 @@
module.exports = {
- '/article': ['HenryQW'],
- '/author/:id': ['HenryQW'],
- '/collection/:id': ['AlexdanerZe'],
+ '/article': ['HenryQW', 'nczitzk'],
+ '/briefcolumn/:id': ['Fatpandac', 'nczitzk'],
+ '/channel/:id?': ['nczitzk'],
+ '/club/:id': ['nczitzk'],
+ '/collection/:id': ['AlexdanerZe', 'nczitzk'],
+ '/member/:id/:type?': ['HenryQW', 'nczitzk'],
'/moment': ['nczitzk'],
- '/tag/:id': ['xyqfer', 'HenryQW'],
- '/search/:keyword': ['xyqfer', 'HenryQW'],
+ '/search/:keyword': ['xyqfer', 'HenryQW', 'nczitzk'],
+ '/tag/:id': ['xyqfer', 'HenryQW', 'nczitzk'],
};
diff --git a/lib/v2/huxiu/member.js b/lib/v2/huxiu/member.js
new file mode 100644
index 00000000000000..86c95c519dafbd
--- /dev/null
+++ b/lib/v2/huxiu/member.js
@@ -0,0 +1,27 @@
+const got = require('@/utils/got');
+
+const { rootUrl, apiMemberRootUrl, processItems, fetchData } = require('./util');
+
+module.exports = async (ctx) => {
+ const { id, type = 'article' } = ctx.params;
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 10;
+
+ const apiUrl = new URL(`web/${type}/${type}List`, apiMemberRootUrl).href;
+ const currentUrl = new URL(`member/${id}${type === 'article' ? '' : `/${type}`}.html`, rootUrl).href;
+
+ const { data: response } = await got.post(apiUrl, {
+ form: {
+ platform: 'www',
+ uid: id,
+ },
+ });
+
+ const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet);
+
+ const data = await fetchData(currentUrl);
+
+ ctx.state.data = {
+ item: items,
+ ...data,
+ };
+};
diff --git a/lib/v2/huxiu/moment.js b/lib/v2/huxiu/moment.js
index fe8b9d3e0c98f3..644719ea5e4f49 100644
--- a/lib/v2/huxiu/moment.js
+++ b/lib/v2/huxiu/moment.js
@@ -1,39 +1,25 @@
const got = require('@/utils/got');
-const { parseDate } = require('@/utils/parse-date');
-const { art } = require('@/utils/render');
-const path = require('path');
-const { baseUrl: rootUrl, momentApi: apiRootUrl } = require('./utils');
+
+const { rootUrl, apiMomentRootUrl, processItems, fetchData } = require('./util');
module.exports = async (ctx) => {
- const currentUrl = `${rootUrl}/moment`;
- const apiUrl = `${apiRootUrl}/web-v2/moment/feed`;
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20;
+
+ const apiUrl = new URL('web-v2/moment/feed', apiMomentRootUrl).href;
+ const currentUrl = new URL('moment', rootUrl).href;
- const response = await got({
- method: 'post',
- url: apiUrl,
+ const { data: response } = await got.post(apiUrl, {
form: {
- last_dateline: parseInt(new Date().getTime() / 1000),
platform: 'www',
- is_ai: 0,
},
});
- const items = response.data.data.moment_list.datalist[0].datalist.map((item) => ({
- title: item.content,
- link: item.share_url,
- author: item.user_info.username,
- pubDate: parseDate(item.publish_time * 1000),
- description: art(path.join(__dirname, 'templates/moment.art'), {
- description: item.content,
- images: item.img_urls,
- video: item.video,
- comments: item.comment,
- }),
- }));
+ const items = await processItems(response.data.moment_list.datalist[0].datalist, limit, ctx.cache.tryGet);
+
+ const data = await fetchData(currentUrl);
ctx.state.data = {
- title: '虎嗅 - 24小时',
- link: currentUrl,
item: items,
+ ...data,
};
};
diff --git a/lib/v2/huxiu/radar.js b/lib/v2/huxiu/radar.js
index 88aade36c25fe2..f37fed6b6be7e6 100644
--- a/lib/v2/huxiu/radar.js
+++ b/lib/v2/huxiu/radar.js
@@ -3,44 +3,52 @@ module.exports = {
_name: '虎嗅',
'.': [
{
- title: '首页资讯',
- docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu',
- source: ['/article', '/'],
+ title: '资讯',
+ docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-zi-xun',
+ source: ['/article'],
target: '/huxiu/article',
},
+ {
+ title: '频道',
+ docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-pin-dao',
+ source: ['/channel'],
+ target: (params) => `/huxiu/channel/${params.id.replace(/\.html$/, '')}`,
+ },
{
title: '24小时',
- docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu',
- source: ['/moment', '/'],
+ docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-24-xiao-shi',
+ source: ['/moment'],
target: '/huxiu/moment',
},
{
title: '标签',
- docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu',
+ docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-biao-qian',
source: ['/tags/:id'],
- target: (params) => `/huxiu/tag/${params.id.replace('.html', '')}`,
+ target: (params) => `/huxiu/tag/${params.id.replace(/\.html$/, '')}`,
},
{
title: '搜索',
- docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu',
+ docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-sou-suo',
+ source: ['/'],
+ target: '/huxiu/search/:keyword',
},
{
title: '作者',
- docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu',
- source: ['/member/:id/*', '/'],
- target: '/huxiu/author/:id',
+ docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-zuo-zhe',
+ source: ['/member/:id/:type'],
+ target: (params) => `/huxiu/member/${params.id}/${params.type.replace(/\.html$/, '')}`,
},
{
title: '文集',
- docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu',
- source: ['/collection/:id', '/'],
- target: (params) => `/huxiu/collection/${params.id.replace('.html', '')}`,
+ docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-wen-ji',
+ source: ['/collection/:id'],
+ target: (params) => `/huxiu/collection/${params.id.replace(/\.html$/, '')}`,
},
{
title: '简报',
- docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu',
+ docs: 'https://docs.rsshub.app/routes/new-media#hu-xiu-jian-bao',
source: ['/briefColumn/:id', '/'],
- target: (params) => `/huxiu/briefcolumn/${params.id.replace('.html', '')}`,
+ target: (params) => `/huxiu/briefcolumn/${params.id.replace(/\.html$/, '')}`,
},
],
},
diff --git a/lib/v2/huxiu/router.js b/lib/v2/huxiu/router.js
index d527815d6f566d..af15ad97b98ca6 100644
--- a/lib/v2/huxiu/router.js
+++ b/lib/v2/huxiu/router.js
@@ -1,9 +1,12 @@
module.exports = (router) => {
- router.get('/article', require('./article'));
- router.get('/author/:id', require('./author'));
+ router.get('/article', require('./channel'));
+ router.get('/author/:id/:type?', require('./member'));
+ router.get('/briefcolumn/:id', require('./briefColumn'));
+ router.get('/channel/:id?', require('./channel'));
+ router.get('/club/:id', require('./club'));
router.get('/collection/:id', require('./collection'));
+ router.get('/member/:id/:type?', require('./member'));
router.get('/moment', require('./moment'));
- router.get('/tag/:id', require('./tag'));
router.get('/search/:keyword', require('./search'));
- router.get('/briefcolumn/:id', require('./briefColumn'));
+ router.get('/tag/:id', require('./tag'));
};
diff --git a/lib/v2/huxiu/search.js b/lib/v2/huxiu/search.js
index a379921bf017df..02943cd3dfb950 100644
--- a/lib/v2/huxiu/search.js
+++ b/lib/v2/huxiu/search.js
@@ -1,40 +1,35 @@
const got = require('@/utils/got');
-const utils = require('./utils');
+
+const { rootUrl, apiSearchRootUrl, generateSignature, processItems, fetchData } = require('./util');
module.exports = async (ctx) => {
const { keyword } = ctx.params;
- const url = `${utils.searchApi}/api/article`;
- const link = utils.baseUrl;
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 20;
- const { data } = await got.post(url, {
- headers: {
- Referer: utils.baseUrl,
- },
+ const apiUrl = new URL('api/article', apiSearchRootUrl).href;
+ const currentUrl = rootUrl;
+
+ const { data: response } = await got.post(apiUrl, {
searchParams: {
platform: 'www',
s: keyword,
sort: '',
page: 1,
- pagesize: 20,
- appid: 'hx_search',
- ...utils.generateSignature(),
+ pagesize: limit,
+ appid: 'hx_search_202303',
+ ...generateSignature(),
},
});
- const list = data.data.datalist.map((d) => ({
- title: d.title,
- link: d.url.includes('article') ? `${d.url}.html` : d.url,
- description: d.content,
- author: d.author,
- }));
+ const items = await processItems(response.data.datalist, limit, ctx.cache.tryGet);
+
+ const data = await fetchData(currentUrl);
+ data.title = `${keyword}-搜索结果-${data.title}`;
- const items = await utils.ProcessFeed(list, ctx.cache);
+ ctx.state.json = response.data.datalist;
- const info = `虎嗅网 - ${keyword}`;
ctx.state.data = {
- title: info,
- link,
- description: info,
item: items,
+ ...data,
};
};
diff --git a/lib/v2/huxiu/tag.js b/lib/v2/huxiu/tag.js
index a1d1909a20588b..c6ca1433350a91 100644
--- a/lib/v2/huxiu/tag.js
+++ b/lib/v2/huxiu/tag.js
@@ -1,38 +1,26 @@
const got = require('@/utils/got');
-const cheerio = require('cheerio');
-const utils = require('./utils');
-const { parseDate } = require('@/utils/parse-date');
-const timezone = require('@/utils/timezone');
+
+const { rootUrl, processItems, fetchData } = require('./util');
module.exports = async (ctx) => {
const { id } = ctx.params;
- const link = `${utils.baseUrl}/tags/${id}.html`;
- const { data } = await got(link, {
- https: {
- rejectUnauthorized: false,
+ const limit = ctx.query.limit ? parseInt(ctx.query.limit, 10) : 10;
+
+ const apiUrl = new URL('v2_action/tag_article_list', rootUrl).href;
+ const currentUrl = new URL(`tags/${id}.html`, rootUrl).href;
+
+ const { data: response } = await got.post(apiUrl, {
+ form: {
+ tag_id: id,
},
});
- const $ = cheerio.load(data);
- const list = $('.related-article li')
- .toArray()
- .map((e) => {
- e = $(e);
- const a = e.find('a');
- return {
- title: a.text(),
- link: `${utils.baseUrl}${a.attr('href')}`,
- pubDate: timezone(parseDate(e.find('.time').text()), 8),
- };
- });
+ const items = await processItems(response.data, limit, ctx.cache.tryGet);
- const items = await utils.ProcessFeed(list, ctx.cache);
+ const data = await fetchData(currentUrl);
- const info = `虎嗅 - ${$('h1').text()}`;
ctx.state.data = {
- title: info,
- link,
- description: info,
item: items,
+ ...data,
};
};
diff --git a/lib/v2/huxiu/templates/brief.art b/lib/v2/huxiu/templates/brief.art
deleted file mode 100644
index 0babfafd68d952..00000000000000
--- a/lib/v2/huxiu/templates/brief.art
+++ /dev/null
@@ -1,22 +0,0 @@
-{{ if brief.preface }}{{@ brief.preface }}
{{ /if }}
-
-{{ if brief.audio_info }}
-
-
-{{ /if }}
-
-{{ if brief.part_list }}{{ each brief.part_list part }}
- {{ if part.title }}
{{ comment.user_info.username }}: {{ comment.content }} ({{ comment.show_time }})
-{{ /each }} -{{ /if }} \ No newline at end of file diff --git a/lib/v2/huxiu/templates/video.art b/lib/v2/huxiu/templates/video.art deleted file mode 100644 index 338b131088f974..00000000000000 --- a/lib/v2/huxiu/templates/video.art +++ /dev/null @@ -1,7 +0,0 @@ -{{ if video_info }} - -{{ /if }} diff --git a/lib/v2/huxiu/util.js b/lib/v2/huxiu/util.js new file mode 100644 index 00000000000000..5bb4b76d8f8dde --- /dev/null +++ b/lib/v2/huxiu/util.js @@ -0,0 +1,460 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); +const { art } = require('@/utils/render'); +const path = require('path'); +const CryptoJS = require('crypto-js'); + +const domain = 'huxiu.com'; +const rootUrl = `https://www.${domain}`; + +const apiArticleRootUrl = `https://api-article.${domain}`; +const apiBriefRootUrl = `https://api-brief.${domain}`; +const apiMemberRootUrl = `https://api-account.${domain}`; +const apiMomentRootUrl = `https://moment-api.${domain}`; +const apiSearchRootUrl = `https://search-api.${domain}`; + +/** + * Cleans up HTML data by removing specific elements and attributes. + * + * @param {string} data - The HTML data to clean up. + * @returns {string} - The cleaned up HTML data. + */ +const cleanUpHTML = (data) => { + const $ = cheerio.load(data); + + $('div.neirong-shouquan').remove(); + $('em.vote__bar, div.vote__btn, div.vote__time').remove(); + $('p img').each((_, e) => { + e = $(e); + e.parent().replaceWith( + art(path.join(__dirname, 'templates/description.art'), { + image: { + src: (e.prop('src') ?? e.prop('_src')).split(/\?/)[0], + width: e.prop('data-w'), + height: e.prop('data-h'), + }, + }) + ); + }); + $('p, span').each((_, e) => { + e = $(e); + if (e.contents().length === 1 && /^\s*$/.test(e.text())) { + e.remove(); + } else { + e.removeClass(); + e.removeAttr('data-check-id label class'); + } + }); + $('.text-big-title').each((_, e) => { + e.tagName = 'h3'; + e = $(e); + e.removeClass(); + e.removeAttr('class'); + }); + $('.text-sm-title').each((_, e) => { + e.tagName = 'h4'; + e = $(e); + e.removeClass(); + e.removeAttr('class'); + }); + + return $.html(); +}; + +/** + * Fetch brief column data for the specified ID. + * + * @param {string} url - The ID of the brief column to fetch data from. + * @returns {Promise