From 3715992ca430777757172039d836574ba194c0c1 Mon Sep 17 00:00:00 2001 From: dzx-dzx Date: Fri, 25 Oct 2024 01:29:14 +0800 Subject: [PATCH] feat(route/nytimes): enhance full text fetch --- lib/routes/nytimes/index.ts | 7 ++++--- lib/routes/nytimes/utils.ts | 12 ++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/lib/routes/nytimes/index.ts b/lib/routes/nytimes/index.ts index bafa23afbfa003..7200c90e1aeaf3 100644 --- a/lib/routes/nytimes/index.ts +++ b/lib/routes/nytimes/index.ts @@ -81,7 +81,7 @@ async function handler(ctx) { const browser = await puppeteer(); const feed = await parser.parseURL(rssUrl); const items = await Promise.all( - feed.items.splice(0, 10).map(async (item) => { + feed.items.splice(0, 3).map(async (item) => { let link = item.link; let response, @@ -118,8 +118,9 @@ async function handler(ctx) { if ($('.dual-btn').length > 0) { hasEnVersion = true; link = $('.dual-btn a').last().attr().href; - - response = await utils.PuppeterGetter(ctx, browser, link); + if (link !== '') { + response = await utils.PuppeterGetter(ctx, browser, link); + } } } } diff --git a/lib/routes/nytimes/utils.ts b/lib/routes/nytimes/utils.ts index 36cf67f089aab7..02b05b1fdd3de9 100644 --- a/lib/routes/nytimes/utils.ts +++ b/lib/routes/nytimes/utils.ts @@ -19,11 +19,15 @@ const PuppeterGetter = async (ctx, browser, link) => { const page = await browser.newPage(); await page.setRequestInterception(true); page.on('request', (request) => { - request.resourceType() === 'document' ? request.continue() : request.abort(); - }); - await page.goto(link, { - waitUntil: 'domcontentloaded', + if (request.url().includes('https://www.nytimes.com/svc/onsite-messaging/query') || request.url().includes('https://meter-svc.nytimes.com/meter.js')) { + request.abort(); + return; + } + request.continue(); + // request.resourceType() === 'document' ? request.continue() : request.abort(); }); + await page.goto(link); + await page.waitForSelector('[data-testid=optimistic-truncator-message]', { hidden: true, timeout: 0 }); const response = await page.evaluate(() => document.querySelector('body').innerHTML); return response; });