Skip to content

Commit

Permalink
fix(route): the Bloomberg RSS doesn't work after using the new ofetch…
Browse files Browse the repository at this point in the history
… as the 'got' lib (#15112)

* Fix the Bloomberg RSS by using the old/deprecated 'got' because the 'redirectUrls' feature is not supported in the new 'got' library.

* Use the redirected prop in RawResponse

* Use the ofetch.raw

* Update lib/routes/bloomberg/index.ts

Reformat the desc of Route

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

---------
  • Loading branch information
bigfei authored Apr 7, 2024
1 parent e7c233b commit ccbe399
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 20 deletions.
35 changes: 32 additions & 3 deletions lib/routes/bloomberg/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,38 @@ const site_title_mapping = {
};

export const route: Route = {
path: ['/:site', '/'],
name: 'Unknown',
maintainers: [],
path: '/:site?',
categories: ['finance'],
example: '/bloomberg/bbiz',
parameters: {
site: 'Site ID, can be found below',
},
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: true,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
name: 'Bloomberg Site',
maintainers: ['bigfei'],
description: `
| Site ID | Title |
| ------------ | ------------ |
| / | News |
| bpol | Politics |
| bbiz | Business |
| markets | Markets |
| technology | Technology |
| green | Green |
| wealth | Wealth |
| pursuits | Pursuits |
| bview | Opinion |
| equality | Equality |
| businessweek | Businessweek |
| citylab | CityLab |
`,
handler,
};

Expand Down
48 changes: 31 additions & 17 deletions lib/routes/bloomberg/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ import cache from '@/utils/cache';
import { load } from 'cheerio';
import path from 'node:path';
import asyncPool from 'tiny-async-pool';
import { destr } from 'destr';

import { parseDate } from '@/utils/parse-date';
import got from '@/utils/got';
import ofetch from '@/utils/ofetch';
import { art } from '@/utils/render';

const rootUrl = 'https://www.bloomberg.com/feeds';
Expand Down Expand Up @@ -60,6 +62,15 @@ const regex = [pageTypeRegex1, pageTypeRegex2];
const capRegex = /<p>|<\/p>/g;
const emptyRegex = /<p\b[^>]*>(&nbsp;|\s)<\/p>/g;

const redirectGot = (url) =>
ofetch.raw(url, {
headers,
parseResponse: (responseText) => ({
data: destr(responseText),
body: responseText,
}),
});

const parseNewsList = async (url, ctx) => {
const resp = await got(url);
const $ = load(resp.data, {
Expand Down Expand Up @@ -96,12 +107,12 @@ const parseArticle = (item) =>

try {
const apiUrl = `${api.url}${link}`;
res = await got(apiUrl, { headers });
res = await redirectGot(apiUrl);
} catch (error) {
// fallback
if (error.name && (error.name === 'HTTPError' || error.name === 'RequestError' || error.name === 'FetchError')) {
try {
res = await got(item.link, { headers });
res = await redirectGot(item.link);
} catch {
// return the default one
return {
Expand All @@ -114,8 +125,7 @@ const parseArticle = (item) =>
}

// Blocked by PX3, or 404 by both api and direct link, return the default
const redirectUrls = res.redirectUrls.map(String);
if (redirectUrls.some((r) => new URL(r).pathname === '/tosv2.html') || res.statusCode === 404) {
if ((res.redirected && new URL(res.url).pathname === '/tosv2.html') || res.status === 404) {
return {
title: item.title,
link: item.link,
Expand All @@ -125,15 +135,15 @@ const parseArticle = (item) =>

switch (page) {
case 'audio':
return parseAudioPage(res, api, item);
return parseAudioPage(res._data, api, item);
case 'videos':
return parseVideoPage(res, api, item);
return parseVideoPage(res._data, api, item);
case 'photo-essays':
return parsePhotoEssaysPage(res, api, item);
return parsePhotoEssaysPage(res._data, api, item);
case 'features/': // single features page
return parseReactRendererPage(res, api, item);
return parseReactRendererPage(res._data, api, item);
default: // use story api to get json
return parseStoryJson(res.data, item);
return parseStoryJson(res._data.data, item);
}
}
}
Expand Down Expand Up @@ -210,8 +220,8 @@ const parseReactRendererPage = async (res, api, item) => {
const json = load(res.data)(api.sel).text().trim();
const story_id = JSON.parse(json)[api.prop];
try {
const res = await got(`${idUrl}${story_id}`, { headers });
return await parseStoryJson(res.data, item);
const res = await redirectGot(`${idUrl}${story_id}`);
return await parseStoryJson(res._data, item);
} catch (error) {
// fallback
if (error.name && (error.name === 'HTTPError' || error.name === 'RequestError' || error.name === 'FetchError')) {
Expand Down Expand Up @@ -364,11 +374,10 @@ const processBody = async (body_html, story_json) => {

const processVideo = async (bmmrId, summary) => {
const api = `https://www.bloomberg.com/multimedia/api/embed?id=${bmmrId}`;
const res = await got(api, { headers });
const res = await redirectGot(api);

// Blocked by PX3, return the default
const redirectUrls = res.redirectUrls.map(String);
if (redirectUrls.some((r) => new URL(r).pathname === '/tosv2.html')) {
if ((res.redirected && new URL(res.url).pathname === '/tosv2.html') || res.status === 404) {
return {
stream: '',
mp4: '',
Expand All @@ -377,16 +386,21 @@ const processVideo = async (bmmrId, summary) => {
};
}

if (res.data) {
const video_json = res.data;
if (res._data.data) {
const video_json = res._data.data;
return {
stream: video_json.streams ? video_json.streams[0]?.url : '',
mp4: video_json.downloadURLs ? video_json.downloadURLs['600'] : '',
coverUrl: video_json.thumbnail?.baseUrl ?? '',
caption: video_json.description || video_json.title || summary,
};
}
return {};
return {
stream: '',
mp4: '',
coverUrl: '',
caption: summary,
};
};

const nodeRenderers = {
Expand Down

0 comments on commit ccbe399

Please sign in to comment.