From c28516582afaf99240cd6f1a0870da1e9ee900ee Mon Sep 17 00:00:00 2001 From: pseudoyu Date: Sat, 2 Nov 2024 22:03:48 +0700 Subject: [PATCH] fix(route/google): missing title, author and content --- lib/routes/google/news.ts | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/lib/routes/google/news.ts b/lib/routes/google/news.ts index 39ba4b38bcf92d..c0f2bb5a690c8f 100644 --- a/lib/routes/google/news.ts +++ b/lib/routes/google/news.ts @@ -3,7 +3,7 @@ import { getCurrentPath } from '@/utils/helpers'; const __dirname = getCurrentPath(import.meta.url); import cache from '@/utils/cache'; -import got from '@/utils/got'; +import ofetch from '@/utils/ofetch'; import { load } from 'cheerio'; import { parseDate } from '@/utils/parse-date'; import { art } from '@/utils/render'; @@ -25,7 +25,7 @@ export const route: Route = { supportScihub: false, }, name: 'News', - maintainers: ['zoenglinghou'], + maintainers: ['zoenglinghou', 'pseudoyu'], handler, }; @@ -34,7 +34,7 @@ async function handler(ctx) { const locale = ctx.req.param('locale'); const categoryUrls = await cache.tryGet(`google:news:${locale}`, async () => { - const { data: front_data } = await got(`${baseUrl}/?${locale}`); + const front_data = await ofetch(`${baseUrl}/?${locale}`); const $ = load(front_data); return [ @@ -61,14 +61,32 @@ async function handler(ctx) { }); const categoryUrl = categoryUrls.find((item) => item.category === category).url; - const { data } = await got(categoryUrl); + const data = await ofetch(categoryUrl); const $ = load(data); const list = [...$('.UwIKyb'), ...$('.IBr9hb'), ...$('.IFHyqb')]; // 3 rows of news, 3-rows-wide news, single row news const items = list.map((item) => { item = $(item); - const title = item.find('h4').text(); + + const title = item.find('.gPFEn').text(); + + const authorText = item.find('.bInasb span').text(); + const authors = authorText + ? authorText + .replace(/^By\s+/i, '') // Handle 'By' case-insensitively + .replaceAll(/\s+\([^)]*\)/g, '') // Remove parenthetical info like (She/Her) + .split(/,|\s+&\s+|\s+and\s+/) // Split on comma, &, and 'and' + .map((author) => author.trim()) + .filter((author) => { + // Filter out empty strings and common suffixes + if (!author) {return false;} + const suffixes = ['et al', 'et al.']; + return !suffixes.some((suffix) => author.toLowerCase().endsWith(suffix)); + }) + .map((author) => ({ name: author })) + : []; + return { title, description: art(path.join(__dirname, 'templates/news.art'), { @@ -76,7 +94,7 @@ async function handler(ctx) { brief: title, }), pubDate: parseDate(item.find('time').attr('datetime')), - author: item.find('.oovtQ').text(), + author: authors, link: new URL(item.find('a.WwrzSb').first().attr('href'), baseUrl).href, }; });