Skip to content

Commit

Permalink
feat(route/nytimes): enhance full text fetch
Browse files Browse the repository at this point in the history
  • Loading branch information
dzx-dzx committed Oct 24, 2024
1 parent 8aeb577 commit 3715992
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 7 deletions.
7 changes: 4 additions & 3 deletions lib/routes/nytimes/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ async function handler(ctx) {
const browser = await puppeteer();
const feed = await parser.parseURL(rssUrl);
const items = await Promise.all(
feed.items.splice(0, 10).map(async (item) => {
feed.items.splice(0, 3).map(async (item) => {
let link = item.link;

let response,
Expand Down Expand Up @@ -118,8 +118,9 @@ async function handler(ctx) {
if ($('.dual-btn').length > 0) {
hasEnVersion = true;
link = $('.dual-btn a').last().attr().href;

response = await utils.PuppeterGetter(ctx, browser, link);
if (link !== '') {
response = await utils.PuppeterGetter(ctx, browser, link);
}
}
}
}
Expand Down
12 changes: 8 additions & 4 deletions lib/routes/nytimes/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,15 @@ const PuppeterGetter = async (ctx, browser, link) => {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' ? request.continue() : request.abort();
});
await page.goto(link, {
waitUntil: 'domcontentloaded',
if (request.url().includes('https://www.nytimes.com/svc/onsite-messaging/query') || request.url().includes('https://meter-svc.nytimes.com/meter.js')) {

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

'
https://www.nytimes.com/svc/onsite-messaging/query
' can be anywhere in the URL, and arbitrary hosts may come before or after it.
request.abort();
return;
}
request.continue();
// request.resourceType() === 'document' ? request.continue() : request.abort();
});
await page.goto(link);
await page.waitForSelector('[data-testid=optimistic-truncator-message]', { hidden: true, timeout: 0 });
const response = await page.evaluate(() => document.querySelector('body').innerHTML);
return response;
});
Expand Down

0 comments on commit 3715992

Please sign in to comment.