Skip to content

Commit

Permalink
fix(route): seekingalpha (#15173)
Browse files Browse the repository at this point in the history
* fix(route): seekingalpha

* feat: obtain article from api

* fix: `length` is undefined
  • Loading branch information
TonyRL authored Apr 9, 2024
1 parent ade6b21 commit fe50eca
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 25 deletions.
85 changes: 60 additions & 25 deletions lib/routes/seekingalpha/index.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import { Route } from '@/types';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { load } from 'cheerio';
import ofetch from '@/utils/ofetch';
import { art } from '@/utils/render';
import { parseDate } from '@/utils/parse-date';
import dayjs from 'dayjs';
import { getCurrentPath } from '@/utils/helpers';
import path from 'node:path';
const __dirname = getCurrentPath(import.meta.url);

const baseUrl = 'https://seekingalpha.com';

export const route: Route = {
Expand All @@ -12,12 +15,7 @@ export const route: Route = {
example: '/seekingalpha/TSM/transcripts',
parameters: { symbol: 'Stock symbol', category: 'Category, see below, `news` by default' },
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
antiCrawler: true,
},
radar: [
{
Expand All @@ -33,39 +31,76 @@ export const route: Route = {
| analysis | news | transcripts | press-releases | related-analysis |`,
};

const getMachineCookie = () =>
cache.tryGet('seekingalpha:machine_cookie', async () => {
const response = await ofetch.raw(baseUrl);
return response.headers.getSetCookie().map((c) => c.split(';')[0]);
});

const apiParams = {
article: {
slug: '/articles',
include: 'author,primaryTickers,secondaryTickers,otherTags,presentations,presentations.slides,author.authorResearch,author.userBioTags,co_authors,promotedService,sentiments',
},
news: {
slug: '/news',
include: 'author,primaryTickers,secondaryTickers,otherTags',
},
pr: {
slug: '/press_releases',
include: 'acquireService,primaryTickers',
},
};

async function handler(ctx) {
const { category = 'news', symbol } = ctx.req.param();
const pageUrl = `${baseUrl}/symbol/${symbol.toUpperCase()}/${category === 'transcripts' ? `earnings/${category}` : category}`;

const response = await got(`${baseUrl}/api/v3/symbols/${symbol.toUpperCase()}/${category}`, {
searchParams: {
cacheBuster: category === 'news' ? dayjs().format('YYYY-MM-DD') : undefined,
const machineCookie = await getMachineCookie();
const response = await ofetch(`${baseUrl}/api/v3/symbols/${symbol.toUpperCase()}/${category}`, {
headers: {
cookie: machineCookie.join('; '),
},
query: {
'filter[since]': 0,
'filter[until]': 0,
id: symbol.toLowerCase(),
include: 'author,primaryTickers,secondaryTickers,sentiments',
'page[size]': ctx.req.query('limit') ? Number(ctx.req.query('limit')) : category === 'news' ? 40 : 20,
'page[size]': ctx.req.query('limit') ? Number.parseInt(ctx.req.query('limit'), 10) : category === 'news' ? 40 : 20,
'page[number]': 1,
},
});

const list = response.data.data?.map((item) => ({
const list = response.data?.map((item) => ({
title: item.attributes.title,
link: new URL(item.links.self, baseUrl).href,
pubDate: parseDate(item.attributes.publishOn),
author: response.data.included.find((i) => i.id === item.relationships.author.data.id).attributes.nick,
author: response.included.find((i) => i.id === item.relationships.author.data.id).attributes.nick,
id: item.id,
articleType: item.links.self.split('/')[1],
}));

const items = list
? await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const response = await got(item.link);
const $ = load(response.data);

const summary = $('[data-test-id=article-summary-title]').length ? $('[data-test-id=article-summary-title]').html() + $('[data-test-id=article-summary-title]').next().html() : '';
const response = await ofetch(`${baseUrl}/api/v3${apiParams[item.articleType].slug}/${item.id}`, {
headers: {
cookie: machineCookie.join('; '),
},
query: {
include: apiParams[item.articleType].include,
},
});

item.category = $('div[data-test-id=themes-list] a')
.toArray()
.map((c) => $(c).text());
item.description = summary + $('div.paywall-full-content').html();
item.category = response.included.filter((i) => i.type === 'tag').map((i) => (i.attributes.company ? `${i.attributes.company} (${i.attributes.name})` : i.attributes.name));
item.description =
(response.data.attributes.summary?.length
? art(path.join(__dirname, 'templates/summary.art'), {
summary: response.data.attributes.summary,
})
: '') + response.data.attributes.content;
item.updated = parseDate(response.data.attributes.lastModified);

return item;
})
Expand All @@ -74,8 +109,8 @@ async function handler(ctx) {
: [];

return {
title: response.data.meta.page.title,
description: response.data.meta.page.description,
title: response.meta.page.title,
description: response.meta.page.description,
link: pageUrl,
image: 'https://seekingalpha.com/samw/static/images/favicon.svg',
item: items,
Expand Down
4 changes: 4 additions & 0 deletions lib/routes/seekingalpha/templates/summary.art
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<h2>Summary</h2>
<ul>
{{ each summary s }}<li>{{ s }}</li>{{ /each }}
</ul>

0 comments on commit fe50eca

Please sign in to comment.