From ccbe3990023fab2d3de11d0fe728bccce72099b6 Mon Sep 17 00:00:00 2001 From: Felix Hsu Date: Sun, 7 Apr 2024 19:35:01 +0800 Subject: [PATCH] fix(route): the Bloomberg RSS doesn't work after using the new ofetch as the 'got' lib (#15112) * Fix the Bloomberg RSS by using the old/deprecated 'got' because the 'redirectUrls' feature is not supported in the new 'got' library. * Use the redirected prop in RawResponse * Use the ofetch.raw * Update lib/routes/bloomberg/index.ts Reformat the desc of Route Co-authored-by: Tony --------- --- lib/routes/bloomberg/index.ts | 35 ++++++++++++++++++++++--- lib/routes/bloomberg/utils.ts | 48 ++++++++++++++++++++++------------- 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/lib/routes/bloomberg/index.ts b/lib/routes/bloomberg/index.ts index d6477af221c703..9a0b8d43a39c6c 100644 --- a/lib/routes/bloomberg/index.ts +++ b/lib/routes/bloomberg/index.ts @@ -16,9 +16,38 @@ const site_title_mapping = { }; export const route: Route = { - path: ['/:site', '/'], - name: 'Unknown', - maintainers: [], + path: '/:site?', + categories: ['finance'], + example: '/bloomberg/bbiz', + parameters: { + site: 'Site ID, can be found below', + }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: true, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + name: 'Bloomberg Site', + maintainers: ['bigfei'], + description: ` + | Site ID | Title | + | ------------ | ------------ | + | / | News | + | bpol | Politics | + | bbiz | Business | + | markets | Markets | + | technology | Technology | + | green | Green | + | wealth | Wealth | + | pursuits | Pursuits | + | bview | Opinion | + | equality | Equality | + | businessweek | Businessweek | + | citylab | CityLab | + `, handler, }; diff --git a/lib/routes/bloomberg/utils.ts b/lib/routes/bloomberg/utils.ts index 377eef2219e54c..6a0ef7cd47cc4a 100644 --- a/lib/routes/bloomberg/utils.ts +++ b/lib/routes/bloomberg/utils.ts @@ -5,9 +5,11 @@ import cache from '@/utils/cache'; import { load } from 'cheerio'; import path from 'node:path'; import asyncPool from 'tiny-async-pool'; +import { destr } from 'destr'; import { parseDate } from '@/utils/parse-date'; import got from '@/utils/got'; +import ofetch from '@/utils/ofetch'; import { art } from '@/utils/render'; const rootUrl = 'https://www.bloomberg.com/feeds'; @@ -60,6 +62,15 @@ const regex = [pageTypeRegex1, pageTypeRegex2]; const capRegex = /

|<\/p>/g; const emptyRegex = /]*>( |\s)<\/p>/g; +const redirectGot = (url) => + ofetch.raw(url, { + headers, + parseResponse: (responseText) => ({ + data: destr(responseText), + body: responseText, + }), + }); + const parseNewsList = async (url, ctx) => { const resp = await got(url); const $ = load(resp.data, { @@ -96,12 +107,12 @@ const parseArticle = (item) => try { const apiUrl = `${api.url}${link}`; - res = await got(apiUrl, { headers }); + res = await redirectGot(apiUrl); } catch (error) { // fallback if (error.name && (error.name === 'HTTPError' || error.name === 'RequestError' || error.name === 'FetchError')) { try { - res = await got(item.link, { headers }); + res = await redirectGot(item.link); } catch { // return the default one return { @@ -114,8 +125,7 @@ const parseArticle = (item) => } // Blocked by PX3, or 404 by both api and direct link, return the default - const redirectUrls = res.redirectUrls.map(String); - if (redirectUrls.some((r) => new URL(r).pathname === '/tosv2.html') || res.statusCode === 404) { + if ((res.redirected && new URL(res.url).pathname === '/tosv2.html') || res.status === 404) { return { title: item.title, link: item.link, @@ -125,15 +135,15 @@ const parseArticle = (item) => switch (page) { case 'audio': - return parseAudioPage(res, api, item); + return parseAudioPage(res._data, api, item); case 'videos': - return parseVideoPage(res, api, item); + return parseVideoPage(res._data, api, item); case 'photo-essays': - return parsePhotoEssaysPage(res, api, item); + return parsePhotoEssaysPage(res._data, api, item); case 'features/': // single features page - return parseReactRendererPage(res, api, item); + return parseReactRendererPage(res._data, api, item); default: // use story api to get json - return parseStoryJson(res.data, item); + return parseStoryJson(res._data.data, item); } } } @@ -210,8 +220,8 @@ const parseReactRendererPage = async (res, api, item) => { const json = load(res.data)(api.sel).text().trim(); const story_id = JSON.parse(json)[api.prop]; try { - const res = await got(`${idUrl}${story_id}`, { headers }); - return await parseStoryJson(res.data, item); + const res = await redirectGot(`${idUrl}${story_id}`); + return await parseStoryJson(res._data, item); } catch (error) { // fallback if (error.name && (error.name === 'HTTPError' || error.name === 'RequestError' || error.name === 'FetchError')) { @@ -364,11 +374,10 @@ const processBody = async (body_html, story_json) => { const processVideo = async (bmmrId, summary) => { const api = `https://www.bloomberg.com/multimedia/api/embed?id=${bmmrId}`; - const res = await got(api, { headers }); + const res = await redirectGot(api); // Blocked by PX3, return the default - const redirectUrls = res.redirectUrls.map(String); - if (redirectUrls.some((r) => new URL(r).pathname === '/tosv2.html')) { + if ((res.redirected && new URL(res.url).pathname === '/tosv2.html') || res.status === 404) { return { stream: '', mp4: '', @@ -377,8 +386,8 @@ const processVideo = async (bmmrId, summary) => { }; } - if (res.data) { - const video_json = res.data; + if (res._data.data) { + const video_json = res._data.data; return { stream: video_json.streams ? video_json.streams[0]?.url : '', mp4: video_json.downloadURLs ? video_json.downloadURLs['600'] : '', @@ -386,7 +395,12 @@ const processVideo = async (bmmrId, summary) => { caption: video_json.description || video_json.title || summary, }; } - return {}; + return { + stream: '', + mp4: '', + coverUrl: '', + caption: summary, + }; }; const nodeRenderers = {