Skip to content

Commit

Permalink
fix(route): deeplearning (#16868)
Browse files Browse the repository at this point in the history
* fix(route): deeplearning

* fix: url cleaning
  • Loading branch information
TonyRL authored Sep 23, 2024
1 parent 6c24f14 commit f378dab
Showing 1 changed file with 30 additions and 18 deletions.
48 changes: 30 additions & 18 deletions lib/routes/deeplearning/thebatch.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import ofetch from '@/utils/ofetch';
import * as cheerio from 'cheerio';
import { parseDate } from '@/utils/parse-date';

export const route: Route = {
path: '/thebatch',
Expand All @@ -27,33 +29,43 @@ export const route: Route = {
};

async function handler() {
const page = await got({
method: 'get',
url: `https://www.deeplearning.ai/the-batch/`,
});
const nextJs = page.data.match(/<script id="__NEXT_DATA__" type="application\/json">(.*)<\/script>/)[1];
const baseUrl = 'https://www.deeplearning.ai';
const link = `${baseUrl}/the-batch/`;
const page = await ofetch(link);
const $ = cheerio.load(page);
const nextJs = $('script#__NEXT_DATA__').text();
const nextBuildId = JSON.parse(nextJs).buildId;

const listing = await got({
method: 'get',
url: `https://www.deeplearning.ai/_next/data/${nextBuildId}/the-batch.json`,
});
const listing = await ofetch(`${baseUrl}/_next/data/${nextBuildId}/the-batch.json`);

const items = listing.data.pageProps.posts.map((item) => ({
const items = listing.pageProps.posts.map((item) => ({
title: item.title,
link: `https://www.deeplearning.ai/the-batch/${item.slug}`,
jsonUrl: `https://www.deeplearning.ai/_next/data/${nextBuildId}/the-batch/${item.slug}.json`,
pubDate: new Date(item.published_at).toUTCString(),
link: `${link}${item.slug}`,
jsonUrl: `${baseUrl}/_next/data/${nextBuildId}/the-batch/${item.slug}.json`,
pubDate: parseDate(item.published_at),
}));

return {
title: `The Batch - a new weekly newsletter from deeplearning.ai`,
link: `https://www.deeplearning.ai/the-batch/`,
title: 'The Batch - a new weekly newsletter from deeplearning.ai',
link,
item: await Promise.all(
items.map((item) =>
cache.tryGet(item.link, async () => {
const resp = await got({ method: 'get', url: item.jsonUrl });
item.description = resp.data.pageProps.cmsData.post.html;
const resp = await ofetch(item.jsonUrl);
const $ = cheerio.load(resp.pageProps.cmsData.post.html);

$('a').each((_, ele) => {
if (ele.attribs.href?.includes('utm_campaign')) {
const url = new URL(ele.attribs.href);
url.searchParams.delete('utm_campaign');
url.searchParams.delete('utm_source');
url.searchParams.delete('utm_medium');
url.searchParams.delete('_hsenc');
ele.attribs.href = url.href;
}
});

item.description = $.html();
return item;
})
)
Expand Down

0 comments on commit f378dab

Please sign in to comment.