From 8fe6566f413861733e39877e5d11cc09758e4ef7 Mon Sep 17 00:00:00 2001 From: jack2game Date: Mon, 30 Sep 2024 23:33:28 +0800 Subject: [PATCH] feat(route): add route for dealstreetasia (#16909) * create namespace * add section route * add home * better sanitise * puppeteer * networkidle2 & more allowed resources * Revert "networkidle2 & more allowed resources" This reverts commit a974e93a8d617c0703b7ad4b3e4e0c15b079b273. * revert puppeteer * revert full text * author placeholder * home new method * home remove extensive comments * new home * new section & home * more sanitize * simplify description * resolve comments * better OR logic * fix: remove duplicate name --------- --- lib/routes/dealstreetasia/home.ts | 72 ++++++++++++++++++++++++++ lib/routes/dealstreetasia/namespace.ts | 6 +++ lib/routes/dealstreetasia/section.ts | 57 ++++++++++++++++++++ 3 files changed, 135 insertions(+) create mode 100644 lib/routes/dealstreetasia/home.ts create mode 100644 lib/routes/dealstreetasia/namespace.ts create mode 100644 lib/routes/dealstreetasia/section.ts diff --git a/lib/routes/dealstreetasia/home.ts b/lib/routes/dealstreetasia/home.ts new file mode 100644 index 00000000000000..e1e740c9718ab5 --- /dev/null +++ b/lib/routes/dealstreetasia/home.ts @@ -0,0 +1,72 @@ +import { Route } from '@/types'; +// import cache from '@/utils/cache'; +import ofetch from '@/utils/ofetch'; // Unified request library used +import { load } from 'cheerio'; // An HTML parser with an API similar to jQuery +// import puppeteer from '@/utils/puppeteer'; +// import { parseDate } from '@/utils/parse-date'; + +export const route: Route = { + path: '/home', + categories: ['traditional-media'], + example: '/dealstreetasia/home', + // parameters: { section: 'target section' }, + radar: [ + { + source: ['dealstreetasia.com/'], + }, + ], + name: 'Home', + maintainers: ['jack2game'], + handler, + url: 'dealstreetasia.com/', +}; + +async function handler() { + // const section = ctx.req.param('section'); + const items = await fetchPage(); + + return items; +} + +async function fetchPage() { + const baseUrl = 'https://dealstreetasia.com'; // Define base URL + + const response = await ofetch(`${baseUrl}/`); + const $ = load(response); + + const jsonData = JSON.parse($('#__NEXT_DATA__').text()); + // const headingText = jsonData.props.pageProps.sectionData.name; + + const pageProps = jsonData.props.pageProps; + const list = [ + ...pageProps.topStories, + ...pageProps.privateEquity, + ...pageProps.ventureCapital, + ...pageProps.unicorns, + ...pageProps.interviews, + ...pageProps.deals, + ...pageProps.analysis, + ...pageProps.ipos, + ...pageProps.opinion, + ...pageProps.policyAndRegulations, + ...pageProps.people, + ...pageProps.earningsAndResults, + ...pageProps.theLpView, + ...pageProps.dvNewsletters, + ...pageProps.reports, + ].map((item) => ({ + title: item.post_title || item.title || 'No Title', + link: item.post_url || item.link || '', + description: item.post_excerpt || item.excerpt || '', + pubDate: item.post_date ? new Date(item.post_date).toUTCString() : item.date ? new Date(item.date).toUTCString() : '', + category: item.category_link ? item.category_link.replaceAll(/(<([^>]+)>)/gi, '') : '', // Clean HTML if category_link exists + image: item.image_url ? item.image_url.replace(/\?.*$/, '') : '', // Remove query parameters if image_url exists + })); + + return { + title: 'Deal Street Asia', + language: 'en', + item: list, + link: 'https://dealstreetasia.com/', + }; +} diff --git a/lib/routes/dealstreetasia/namespace.ts b/lib/routes/dealstreetasia/namespace.ts new file mode 100644 index 00000000000000..62f500273335d9 --- /dev/null +++ b/lib/routes/dealstreetasia/namespace.ts @@ -0,0 +1,6 @@ +import type { Namespace } from '@/types'; + +export const namespace: Namespace = { + name: 'DealStreetAsia', + url: 'dealstreetasia.com', +}; diff --git a/lib/routes/dealstreetasia/section.ts b/lib/routes/dealstreetasia/section.ts new file mode 100644 index 00000000000000..d8fc0d7dbca592 --- /dev/null +++ b/lib/routes/dealstreetasia/section.ts @@ -0,0 +1,57 @@ +import { Route } from '@/types'; +// import cache from '@/utils/cache'; +import ofetch from '@/utils/ofetch'; // Unified request library used +import { load } from 'cheerio'; // An HTML parser with an API similar to jQuery +// import puppeteer from '@/utils/puppeteer'; +// import { parseDate } from '@/utils/parse-date'; + +export const route: Route = { + path: '/section/:section', + categories: ['traditional-media'], + example: '/dealstreetasia/section/private-equity', + parameters: { section: 'target section' }, + radar: [ + { + source: ['dealstreetasia.com/'], + }, + ], + name: 'Section', + maintainers: ['jack2game'], + handler, + url: 'dealstreetasia.com/', +}; + +async function handler(ctx) { + const section = ctx.req.param('section'); + const items = await fetchPage(section); + + return items; +} + +async function fetchPage(section: string) { + const baseUrl = 'https://dealstreetasia.com'; // Define base URL + + const response = await ofetch(`${baseUrl}/section/${section}/`); + const $ = load(response); + + const jsonData = JSON.parse($('#__NEXT_DATA__').text()); + const headingText = jsonData.props.pageProps.sectionData.name; + + const items = jsonData.props.pageProps.sectionData.stories.nodes; + + const feedItems = items.map((item) => ({ + title: item.title || 'No Title', + link: item.uri ? `https://www.dealstreetasia.com${item.uri}` : '', + description: item.excerpt || '', // Default to empty string if undefined + pubDate: item.post_date ? new Date(item.post_date).toUTCString() : '', + category: item.sections.nodes.map((section) => section.name), + image: item.featuredImage?.node?.mediaItemUrl.replace(/\?.*$/, ''), // Use .replace to sanitize the image URL + })); + + return { + title: 'Deal Street Asia - ' + headingText, + language: 'en', + item: feedItems, + link: 'https://dealstreetasia.com/section/' + section + '/', + }; +}