Skip to content

Commit

Permalink
fix(route): mingpao more fancybox (#17209)
Browse files Browse the repository at this point in the history
  • Loading branch information
TonyRL authored Oct 21, 2024
1 parent 195327b commit 5b0ec7d
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 37 deletions.
133 changes: 97 additions & 36 deletions lib/routes/mingpao/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@ import { getCurrentPath } from '@/utils/helpers';
const __dirname = getCurrentPath(import.meta.url);

import cache from '@/utils/cache';
import got from '@/utils/got';
import ofetch from '@/utils/ofetch';
import parser from '@/utils/rss-parser';
import { load } from 'cheerio';
import * as cheerio from 'cheerio';
import { parseDate } from '@/utils/parse-date';
import { art } from '@/utils/render';
import path from 'node:path';
import logger from '@/utils/logger';

const renderFanBox = (media) =>
art(path.join(__dirname, 'templates/fancybox.art'), {
Expand All @@ -22,11 +21,79 @@ const renderDesc = (media, desc) =>
desc,
});

const fixFancybox = (element, $) => {
const $e = $(element);
const url = new URL($e.attr('href'));
let video;
if (url.hostname === 'videop.mingpao.com') {
video = new URL(url.searchParams.get('file'));
video.hostname = 'cfrvideo.mingpao.com'; // use cloudflare cdn
video = video.href;
}
return {
href: url.href,
title: $e.attr('title'),
video,
};
};

export const route: Route = {
path: '/:type?/:category?',
name: 'Unknown',
name: '新聞',
example: '/mingpao/ins/all',
parameters: {
type: {
description: '新聞類型',
default: 'ins',
options: [
{ value: 'ins', label: '即時新聞' },
{ value: 'pns', label: '每日明報' },
],
},
category: '頻道,見下表',
},
radar: [
{
title: '即時新聞',
source: ['news.mingpao.com/ins/:categoryName/section/:date/:category'],
target: '/mingpao/ins/:category',
},
{
title: '每日明報',
source: ['news.mingpao.com/pns/:categoryName/section/:date/:category'],
target: '/mingpao/pns/:category',
},
],
maintainers: ['TonyRL'],
handler,
description: `| category | 即時新聞頻道 |
| -------- | ------------ |
| all | 總目錄 |
| s00001 | 港聞 |
| s00002 | 經濟 |
| s00003 | 地產 |
| s00004 | 兩岸 |
| s00005 | 國際 |
| s00006 | 體育 |
| s00007 | 娛樂 |
| s00022 | 文摘 |
| s00024 | 熱點 |
| category | 每日明報頻道 |
| -------- | ------------ |
| s00001 | 要聞 |
| s00002 | 港聞 |
| s00003 | 社評 |
| s00004 | 經濟 |
| s00005 | 副刊 |
| s00011 | 教育 |
| s00012 | 觀點 |
| s00013 | 中國 |
| s00014 | 國際 |
| s00015 | 體育 |
| s00016 | 娛樂 |
| s00017 | English |
| s00018 | 作家專欄 |`,
};

async function handler(ctx) {
Expand All @@ -35,26 +102,16 @@ async function handler(ctx) {
const link = `https://news.mingpao.com/rss/${type}/${category}.xml`;

const feed = await parser.parseURL(link);

const items = await Promise.all(
feed.items.map((item) =>
cache.tryGet(item.link, async () => {
let response;
try {
response = await got(item.link, {
headers: {
Referer: 'https://news.mingpao.com/',
},
});
} catch (error) {
if (error instanceof got.MaxRedirectsError) {
logger.error(`MaxRedirectsError when requesting ${decodeURIComponent(item.link)}`);
return item;
}
throw error;
}
const response = await ofetch(item.link, {
headers: {
Referer: 'https://news.mingpao.com/',
},
});

const $ = load(response.data);
const $ = cheerio.load(response);
const fancyboxImg = $('a.fancybox').length ? $('a.fancybox') : $('a.fancybox-buttons');

// remove unwanted elements
Expand All @@ -68,21 +125,25 @@ async function handler(ctx) {
item.category = item.categories;

// fix fancybox image
const fancybox = fancyboxImg.toArray().map((e) => {
e = $(e);
const href = new URL(e.attr('href'));
let video;
if (href.hostname === 'videop.mingpao.com') {
video = new URL(href.searchParams.get('file'));
video.hostname = 'cfrvideo.mingpao.com'; // use cloudflare cdn
video = video.href;
}
return {
href: href.href,
title: e.attr('title'),
video,
};
});
let fancybox = fancyboxImg.toArray().map((e) => fixFancybox(e, $));
const script = $('script')
.toArray()
.find((e) => $(e).text()?.includes("$('#lower').prepend('"));
const lowerContent = script
? $(script)
.text()
?.match(/\$\('#lower'\)\.prepend\('(.*)'\);/)?.[1]
?.replaceAll(/\\"/g, '"')
: '';
if (lowerContent) {
const $ = cheerio.load(lowerContent, null, false);
fancybox = [
...fancybox,
...$('a.fancybox')
.toArray()
.map((e) => fixFancybox(e, $)),
];
}

// remove unwanted key value
delete item.categories;
Expand All @@ -91,7 +152,7 @@ async function handler(ctx) {
delete item.creator;
delete item.isoDate;

item.description = renderDesc(fancybox, $('.txt4').html() ?? $('.article_content.line_1_5em').html());
item.description = renderDesc(fancybox, $('.txt4').html() ?? $('.article_content.line_1_5em').html() ?? $('.txt3').html());
item.pubDate = parseDate(item.pubDate);
item.guid = item.link.includes('?') ? item.link : item.link.substring(0, item.link.lastIndexOf('/'));

Expand Down
2 changes: 1 addition & 1 deletion lib/routes/mingpao/namespace.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: 'Ming Pao 明报',
name: '明報',
url: 'mingpao.com',
};

0 comments on commit 5b0ec7d

Please sign in to comment.