Skip to content

Commit

Permalink
fix(route/phoronix): adapt to the new web path (#13946)
Browse files Browse the repository at this point in the history
Signed-off-by: Rongrong <i@rong.moe>
  • Loading branch information
Rongronggg9 authored Dec 3, 2023
1 parent faa249f commit c68fba6
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 54 deletions.
225 changes: 177 additions & 48 deletions lib/v2/phoronix/index.js
Original file line number Diff line number Diff line change
@@ -1,38 +1,185 @@
const parser = require('@/utils/rss-parser');
const cheerio = require('cheerio');
const got = require('@/utils/got');
const dayjs = require('dayjs');
dayjs.extend(require('dayjs/plugin/utc'));
dayjs.extend(require('dayjs/plugin/timezone'));

const redirectCacheKey = 'phoronix:redirect';
const webArticlesCacheKey = 'phoronix:web-articles';
const articleCacheKey = 'phoronix:articles';

const baseUrl = 'https://www.phoronix.com';
const rssUrl = `${baseUrl}/rss.php`;

module.exports = async (ctx) => {
const { page, queryOrItem } = ctx.params;
const rssUrl = new URL('/rss.php', baseUrl);
rssUrl.searchParams.set('page', page);
const feedFetch = async () => {
const feed = await parser.parseURL(rssUrl);
return {
title: feed.title,
link: feed.link,
description: feed.description,
item: feed.items,
language: feed.language,
icon: 'https://www.phoronix.com/android-chrome-192x192.png',
image: 'https://www.phoronix.com/android-chrome-192x192.png',
logo: 'https://www.phoronix.com/phxcms7-css/phoronix.png',
// Copied from the web page metadata
category: [
'Linux Hardware Reviews',
'Linux hardware benchmarks',
'Linux Hardware',
'Linux benchmarking',
'Desktop Linux',
'GNU/Linux benchmarks',
'Open Source AMD',
'Linux How To',
'X.Org drivers',
'Ubuntu hardware',
'Phoronix Test Suite',
],
};
};

const webFetchCb = (response) => {
const $ = cheerio.load(response.body);
return {
title: $('title').text(),
link: response.url,
description: $('meta[name="Description"]').attr('content'),
item: [
...new Set(
$('#main a')
.toArray()
.map((e) => e.attribs.href)
),
]
.filter((link) => link && (link.startsWith('/review/') || link.startsWith('/news/')))
.map((link) => ({ link: `${baseUrl}${link}` })),
language: 'en-us',
icon: 'https://www.phoronix.com/android-chrome-192x192.png',
image: 'https://www.phoronix.com/android-chrome-192x192.png',
logo: 'https://www.phoronix.com/phxcms7-css/phoronix.png',
category: $('meta[name="keywords"]').attr('content').split(', '),
};
};

const webFetch = (ctx, url) =>
ctx.cache.tryGet(`${webArticlesCacheKey}:${url}`, async () => {
try {
return webFetchCb(await got(url));
} catch (error) {
if (error.name === 'HTTPError' && error.response.statusCode === 404) {
return '404';
}
throw error;
}
});

const legacyFetch = async (ctx, page, queryOrItem) => {
const legacyUrl = new URL('/scan.php', baseUrl);
legacyUrl.searchParams.set('page', page);
if (queryOrItem) {
if (page === 'category') {
rssUrl.searchParams.set('item', queryOrItem);
legacyUrl.searchParams.set('item', queryOrItem);
} else {
rssUrl.searchParams.set('q', queryOrItem);
legacyUrl.searchParams.set('q', queryOrItem);
}
}

const feed = await parser.parseURL(rssUrl.toString());
let response;
const webUrl = await ctx.cache.tryGet(`${redirectCacheKey}:${legacyUrl.toString()}`, async () => {
response = await got(legacyUrl.toString());
return response.url;
});
if (response) {
const feed = webFetchCb(response);
ctx.cache.set(`${webArticlesCacheKey}:${webUrl}`, feed);
return feed;
}
return await webFetch(ctx, webUrl);
};

const items = await Promise.all(
feed.items.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const tryFetch = async (ctx, category, topic) => {
const webUrl = topic ? `${baseUrl}/${category}/${topic}` : `${baseUrl}/${category}`;
let feed = await webFetch(ctx, webUrl);
if (feed === '404') {
feed = await legacyFetch(ctx, category, topic);
}
return feed;
};

module.exports = async (ctx) => {
const { category, topic } = ctx.params;
let feed;
switch (category) {
case 'category':
case 'news_topic':
feed = await legacyFetch(ctx, category, topic);
break;
case 'rss':
feed = await feedFetch();
break;
default:
feed = category ? await tryFetch(ctx, category, topic) : await feedFetch();
break;
}

feed.item = await Promise.all(
feed.item.map((item) =>
ctx.cache.tryGet(`${articleCacheKey}:${item.link}`, async () => {
const response = await got(item.link);
const html = response.body;
const $ = cheerio.load(html);
const content = $('.content');

// Author
const authorSelector = $('.author > a');
// thel last 2 are the category and comments
// the last 2 are the category and comments
const author = authorSelector
.slice(0, authorSelector.length - 2)
.toArray()
.map((e) => $(e).text());
const category = [];
if (item.link.includes('/news/')) {
category.push('News');
} else if (item.link.includes('/review/')) {
category.push('Review');
}
const categorySelector = authorSelector.eq(-2);
if (categorySelector.length) {
category.push(categorySelector.text());
}
let pubDate;
if (!item.pubDate) {
// the text next to the category is the date
let pubDateReadable = categorySelector.length && categorySelector[0].nextSibling?.nodeValue;
if (pubDateReadable) {
pubDateReadable = pubDateReadable.replace(/on|at|\./g, '').trim();
if (/\d{4}$/.test(pubDateReadable)) {
// Only date, no time
// Michael Larabel lives in Indiana, USA, so we assume TZ=America/Indiana/Indianapolis
// https://www.phoronix.com/review/phoronix_office_2014
// Here we use the trick to take daylight saving into account.
pubDate = dayjs
// If we don't append "UTC" at the end,
// dayjs.utc() may still parse the date in the platform (local) timezone.
// E.g., if the platform timezone is UTC+8, then:
// > dayjs.utc('2 Dec 2023').toString()
// 'Fri, 01 Dec 2023 16:00:00 GMT'
// > dayjs.utc('2 Dec 2023 UTC').toString()
// 'Sat, 02 Dec 2023 00:00:00 GMT'
// Append "UTC" at the end to explicitly prohibit the weird behavior.
.utc(`${pubDateReadable} 08:00 UTC`)
.tz('America/Indiana/Indianapolis', true);
} else {
// date, time, and timezone (including daylight saving)
pubDate = dayjs(pubDateReadable);
}
if (!pubDate.isValid()) {
pubDate = pubDateReadable;
}
}
}

// Maybe it's paginated
const links = $('.pagination > a')
Expand All @@ -55,53 +202,35 @@ module.exports = async (ctx) => {
content.append(pages);
}

// Summary
const summary = $('.content > p:nth-child(1)');

// High res images
content.find('img').each((_, img) => {
if (img.attribs.src.endsWith('_med')) {
img.attribs.src = img.attribs.src.replace('_med', '_show');
const images = content.find('img');
// Remove topic image
const topicImage = images.first();
if (topicImage.attr('src')?.startsWith('/assets/categories/')) {
const topicImageContainer = topicImage.parent();
if (!topicImageContainer.text().trim()) {
topicImageContainer.remove();
} else {
topicImage.remove();
}
}
// High-res images
images.each((_, img) => {
img.attribs.src = img.attribs.src.replace(/_med$/, '');
});

return {
title: item.title,
id: item.guid,
pubDate: item.pubDate,
title: item.title || $('article h1').text(),
pubDate: item.pubDate || pubDate,
author: author.join(', '),
link: item.link,
summary: summary.html(),
summary: $('meta[name="twitter:description"]').attr('content'),
description: content.html(),
icon: 'https://www.phoronix.com/android-chrome-192x192.png',
logo: 'https://www.phoronix.com/phxcms7-css/phoronix.png',
image: $('meta[name="twitter:image"]').attr('content'),
category: item.category || category,
};
})
)
);

ctx.state.data = {
title: feed.title,
link: feed.link,
description: feed.description,
item: items,
language: feed.language,
icon: 'https://www.phoronix.com/android-chrome-192x192.png',
image: 'https://www.phoronix.com/android-chrome-192x192.png',
logo: 'https://www.phoronix.com/phxcms7-css/phoronix.png',
// Copied from thier web page metadata
category: [
'Linux Hardware Reviews',
'Linux hardware benchmarks',
'Linux Hardware',
'Linux benchmarking',
'Desktop Linux',
'GNU/Linux benchmarks',
'Open Source AMD',
'Linux How To',
'X.Org drivers',
'Ubuntu hardware',
'Phoronix Test Suite',
],
};
ctx.state.data = feed;
};
2 changes: 1 addition & 1 deletion lib/v2/phoronix/maintainer.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module.exports = {
'/:page/:queryOrItem?': ['oppliate'],
'/:category?/:topic?': ['oppliate', 'Rongronggg9'],
};
6 changes: 3 additions & 3 deletions lib/v2/phoronix/radar.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ module.exports = {
_name: 'Phoronix',
'.': [
{
title: '新闻与评测',
title: 'News & Reviews',
docs: 'https://docs.rsshub.app/routes/new-media#phoronix',
source: ['/*'],
target: '/phoronix/news',
source: ['/:category?/:topic?'],
target: '/phoronix/:category?/:topic?',
},
],
},
Expand Down
2 changes: 1 addition & 1 deletion lib/v2/phoronix/router.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module.exports = (router) => {
router.get('/:page/:queryOrItem?', require('./index'));
router.get('/:category?/:topic?', require('./index'));
};
2 changes: 1 addition & 1 deletion website/docs/routes/new-media.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1567,7 +1567,7 @@ This route provides a flexible plan with full text content to subscribe specific

### News & Reviews {#phoronix-news-reviews}

<Route author="oppliate" example="/phoronix/news_topic/Intel" path="/phoronix/:page/:queryOrItem?" paramsDesc={['Page name', 'For `category` it corresponds to `item`, for other pages it\'s `q`. You may find available parameters from their navigator links. E.g. to subscribe to the category page `https://www.phoronix.com/scan.php?page=category&item=Computers`, fill in the path `/phoronix/category/Computers`']} radar="1"/>
<Route author="oppliate Rongronggg9" example="/phoronix/linux/KDE" path="/phoronix/:category?/:topic?" paramsDesc={['Category', 'Topic. You may find available parameters from their navigator links. E.g. to subscribe to `https://www.phoronix.com/reviews/Operating+Systems`, fill in the path `/phoronix/reviews/Operating+Systems`']} radar="1"/>

## PMCAFF {#pmcaff}

Expand Down

0 comments on commit c68fba6

Please sign in to comment.