Skip to content

Commit

Permalink
feat(route): add route for dealstreetasia (#16909)
Browse files Browse the repository at this point in the history
* create namespace

* add section route

* add home

* better sanitise

* puppeteer

* networkidle2 & more allowed resources

* Revert "networkidle2 & more allowed resources"

This reverts commit a974e93.

* revert puppeteer

* revert full text

* author placeholder

* home new method

* home remove extensive comments

* new home

* new section & home

* more sanitize

* simplify description

* resolve comments

* better OR logic

* fix: remove duplicate name

---------
  • Loading branch information
jack2game authored Sep 30, 2024
1 parent 0f81550 commit 6bd881c
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 0 deletions.
72 changes: 72 additions & 0 deletions lib/routes/dealstreetasia/home.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { Route } from '@/types';
// import cache from '@/utils/cache';
import ofetch from '@/utils/ofetch'; // Unified request library used
import { load } from 'cheerio'; // An HTML parser with an API similar to jQuery
// import puppeteer from '@/utils/puppeteer';
// import { parseDate } from '@/utils/parse-date';

export const route: Route = {
path: '/home',
categories: ['traditional-media'],
example: '/dealstreetasia/home',
// parameters: { section: 'target section' },
radar: [
{
source: ['dealstreetasia.com/'],
},
],
name: 'Home',
maintainers: ['jack2game'],
handler,
url: 'dealstreetasia.com/',
};

async function handler() {
// const section = ctx.req.param('section');
const items = await fetchPage();

return items;
}

async function fetchPage() {
const baseUrl = 'https://dealstreetasia.com'; // Define base URL

const response = await ofetch(`${baseUrl}/`);
const $ = load(response);

const jsonData = JSON.parse($('#__NEXT_DATA__').text());
// const headingText = jsonData.props.pageProps.sectionData.name;

const pageProps = jsonData.props.pageProps;
const list = [
...pageProps.topStories,
...pageProps.privateEquity,
...pageProps.ventureCapital,
...pageProps.unicorns,
...pageProps.interviews,
...pageProps.deals,
...pageProps.analysis,
...pageProps.ipos,
...pageProps.opinion,
...pageProps.policyAndRegulations,
...pageProps.people,
...pageProps.earningsAndResults,
...pageProps.theLpView,
...pageProps.dvNewsletters,
...pageProps.reports,
].map((item) => ({
title: item.post_title || item.title || 'No Title',
link: item.post_url || item.link || '',
description: item.post_excerpt || item.excerpt || '',
pubDate: item.post_date ? new Date(item.post_date).toUTCString() : item.date ? new Date(item.date).toUTCString() : '',
category: item.category_link ? item.category_link.replaceAll(/(<([^>]+)>)/gi, '') : '', // Clean HTML if category_link exists
image: item.image_url ? item.image_url.replace(/\?.*$/, '') : '', // Remove query parameters if image_url exists
}));

return {
title: 'Deal Street Asia',
language: 'en',
item: list,
link: 'https://dealstreetasia.com/',
};
}
6 changes: 6 additions & 0 deletions lib/routes/dealstreetasia/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: 'DealStreetAsia',
url: 'dealstreetasia.com',
};
57 changes: 57 additions & 0 deletions lib/routes/dealstreetasia/section.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { Route } from '@/types';
// import cache from '@/utils/cache';
import ofetch from '@/utils/ofetch'; // Unified request library used
import { load } from 'cheerio'; // An HTML parser with an API similar to jQuery
// import puppeteer from '@/utils/puppeteer';
// import { parseDate } from '@/utils/parse-date';

export const route: Route = {
path: '/section/:section',
categories: ['traditional-media'],
example: '/dealstreetasia/section/private-equity',
parameters: { section: 'target section' },
radar: [
{
source: ['dealstreetasia.com/'],
},
],
name: 'Section',
maintainers: ['jack2game'],
handler,
url: 'dealstreetasia.com/',
};

async function handler(ctx) {
const section = ctx.req.param('section');
const items = await fetchPage(section);

return items;
}

async function fetchPage(section: string) {
const baseUrl = 'https://dealstreetasia.com'; // Define base URL

const response = await ofetch(`${baseUrl}/section/${section}/`);
const $ = load(response);

const jsonData = JSON.parse($('#__NEXT_DATA__').text());
const headingText = jsonData.props.pageProps.sectionData.name;

const items = jsonData.props.pageProps.sectionData.stories.nodes;

const feedItems = items.map((item) => ({
title: item.title || 'No Title',
link: item.uri ? `https://www.dealstreetasia.com${item.uri}` : '',
description: item.excerpt || '', // Default to empty string if undefined
pubDate: item.post_date ? new Date(item.post_date).toUTCString() : '',
category: item.sections.nodes.map((section) => section.name),
image: item.featuredImage?.node?.mediaItemUrl.replace(/\?.*$/, ''), // Use .replace to sanitize the image URL
}));

return {
title: 'Deal Street Asia - ' + headingText,
language: 'en',
item: feedItems,
link: 'https://dealstreetasia.com/section/' + section + '/',
};
}

0 comments on commit 6bd881c

Please sign in to comment.