From 983bc8f360839d3fe1840991bc7bd2f57f72ad38 Mon Sep 17 00:00:00 2001 From: Neko Aria <990879119@qq.com> Date: Thu, 17 Oct 2024 00:18:10 +0800 Subject: [PATCH] fix(route/zaobao): update list parsing logic (#17152) --- lib/routes/zaobao/util.ts | 84 +++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 43 deletions(-) diff --git a/lib/routes/zaobao/util.ts b/lib/routes/zaobao/util.ts index 19fc63a51a6d2f..4de18e3a57ecd8 100644 --- a/lib/routes/zaobao/util.ts +++ b/lib/routes/zaobao/util.ts @@ -38,7 +38,7 @@ const parseList = async ( }> => { const response = await got_ins.get(baseUrl + sectionUrl); const $ = load(response.data); - let data = /realtime/.test(sectionUrl) ? $('.card-listing .card') : $('.article-list').find('.article-type'); + let data = $('.card-listing .card'); if (data.length === 0) { // for HK version data = $('.clearfix').find('.list-block'); @@ -104,48 +104,7 @@ const parseList = async ( const articleBody = articleBodyNode.html(); - const imageDataArray = []; - if ($1('.inline-figure-img').length) { - // for SG version - imageDataArray.push({ - type: 'normalHTML', - html: $1('.inline-figure-img') - .html() - .replace(/\/\/.*\.com\/s3fs-public/, '//static.zaobao.com/s3fs-public') - .replace(/s3\/files/, 's3fs-public'), - }); - } - if ($1('.body-content .loadme picture img').length) { - // Unused? - imageDataArray.push({ - type: 'data', - src: $1('.body-content .loadme picture source') - .attr('data-srcset') - .replace(/\/\/.*\.com\/s3fs-public/, '//static.zaobao.com/s3fs-public') - .replace(/s3\/files/, 's3fs-public'), - title: $1('.body-content .loadme picture img').attr('title'), - }); - } - if ($1('.inline-figure-gallery').length) { - // for SG version - imageDataArray.push({ - type: 'normalHTML', - html: $1('.inline-figure-gallery') - .html() - .replaceAll(/\/\/.*\.com\/s3fs-public/g, '//static.zaobao.com/s3fs-public') - .replaceAll('s3/files', 's3fs-public'), - }); - } - if ($1('#carousel-article').length) { - // for HK version, HK version of multi images use same selector as single image, so g is needed for all pages - imageDataArray.push({ - type: 'normalHTML', - html: $1('#carousel-article .carousel-inner') - .html() - .replaceAll(/\/\/.*\.com\/s3fs-public/g, '//static.zaobao.com/s3fs-public') - .replaceAll('s3/files', 's3fs-public'), - }); - } + const imageDataArray = processImageData($1); return { // <- for SG version -> for HK version @@ -182,4 +141,43 @@ const orderContent = (parent) => { } }; +interface ImageData { + type: string; + html: string; + src?: string; + title?: string; +} + +const processImageData = ($1) => { + const imageDataArray: ImageData[] = []; + + const imageSelectors = [ + '.inline-figure-img', // for SG version + '.body-content .loadme picture img', // Unused? + '.inline-figure-gallery', // for SG version + '#carousel-article', // for HK version, HK version of multi images use same selector as single image, so g is needed for all pages + ]; + + for (const selector of imageSelectors) { + if ($1(selector).length) { + let html = $1(selector === '#carousel-article' ? '#carousel-article .carousel-inner' : selector).html(); + + if (html) { + html = html.replaceAll(/\/\/.*\.com\/s3fs-public/g, '//static.zaobao.com/s3fs-public').replaceAll('s3/files', 's3fs-public'); + + imageDataArray.push({ + type: selector === '.body-content .loadme picture img' ? 'data' : 'normalHTML', + html, + ...(selector === '.body-content .loadme picture img' && { + src: $1('.body-content .loadme picture source').attr('data-srcset'), + title: $1(selector).attr('title'), + }), + }); + } + } + } + + return imageDataArray; +}; + export { parseList, orderContent };