From cf7ce2470b1381cef2efc811e9c338a6ff73e866 Mon Sep 17 00:00:00 2001 From: Tsuyumi <40047364+SnowAgar25@users.noreply.github.com> Date: Wed, 6 Nov 2024 03:56:50 +0800 Subject: [PATCH] =?UTF-8?q?feat(route/pixiv):=20add=20R18=20novels=20suppo?= =?UTF-8?q?rt=20and=20full=20content=20toggle=20for=E2=80=A6=20(#17391)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(route/pixiv): add R18 novels support and full content toggle for user novels * fix: information & image placeholders * refactor: split novels fetching into SFW/NSFW modules and improve type definitions * feat: add info for sfw * feat: add radar * refactor: use jsdom instead of regex * feat: add limit support for nsfw novels * docs: rename radar title * revert: part of #17440 Object.entries(options.searchParams) returns `[]` * fix: clean up * feat: early exit when no SFW novels found * refactor: combine novel parsing logic into utils * docs: restore pixiv doc link * feat: cache novel content * refactor: cleanup * refactor: full content function --------- --- lib/routes/pixiv/api/get-illust-detail.ts | 22 ++ lib/routes/pixiv/api/get-novels-nsfw.ts | 247 ++++++++++++++++++++++ lib/routes/pixiv/api/get-novels-sfw.ts | 201 ++++++++++++++++++ lib/routes/pixiv/novels.ts | 110 ++++++---- lib/routes/pixiv/utils.ts | 119 +++++++++++ 5 files changed, 654 insertions(+), 45 deletions(-) create mode 100644 lib/routes/pixiv/api/get-illust-detail.ts create mode 100644 lib/routes/pixiv/api/get-novels-nsfw.ts create mode 100644 lib/routes/pixiv/api/get-novels-sfw.ts diff --git a/lib/routes/pixiv/api/get-illust-detail.ts b/lib/routes/pixiv/api/get-illust-detail.ts new file mode 100644 index 00000000000000..64f524f1f82941 --- /dev/null +++ b/lib/routes/pixiv/api/get-illust-detail.ts @@ -0,0 +1,22 @@ +import got from '../pixiv-got'; +import { maskHeader } from '../constants'; +import queryString from 'query-string'; + +/** + * 获取插画详细信息 + * @param {string} illust_id 插画作品 id + * @param {string} token pixiv oauth token + * @returns {Promise>} + */ +export default function getIllustDetail(illust_id: string, token: string) { + return got('https://app-api.pixiv.net/v1/illust/detail', { + headers: { + ...maskHeader, + Authorization: 'Bearer ' + token, + }, + searchParams: queryString.stringify({ + illust_id, + filter: 'for_ios', + }), + }); +} diff --git a/lib/routes/pixiv/api/get-novels-nsfw.ts b/lib/routes/pixiv/api/get-novels-nsfw.ts new file mode 100644 index 00000000000000..bc6f125707e8f6 --- /dev/null +++ b/lib/routes/pixiv/api/get-novels-nsfw.ts @@ -0,0 +1,247 @@ +import got from '../pixiv-got'; +import { maskHeader } from '../constants'; +import queryString from 'query-string'; +import { config } from '@/config'; +import { JSDOM, VirtualConsole } from 'jsdom'; + +import pixivUtils from '../utils'; +import ConfigNotFoundError from '@/errors/types/config-not-found'; +import cache from '@/utils/cache'; +import { parseDate } from 'tough-cookie'; +import { getToken } from '../token'; + +interface nsfwNovelWork { + id: string; + title: string; + caption: string; + restrict: number; + x_restrict: number; + is_original: boolean; + image_urls: { + square_medium: string; + medium: string; + large: string; + }; + create_date: string; + tags: Array<{ + name: string; + translated_name: string | null; + added_by_uploaded_user: boolean; + }>; + page_count: number; + text_length: number; + user: { + id: number; + name: string; + account: string; + profile_image_urls: { + medium: string; + }; + is_followed: boolean; + is_access_blocking_user: boolean; + }; + series?: { + id?: number; + title?: string; + }; + total_bookmarks: number; + total_view: number; + total_comments: number; +} + +interface nsfwNovelsResponse { + data: { + user: { + id: number; + name: string; + account: string; + profile_image_urls: { + medium: string; + }; + is_followed: boolean; + is_access_blocking_user: boolean; + }; + novels: nsfwNovelWork[]; + }; +} + +interface nsfwNovelDetail { + id: string; + title: string; + seriesId: string | null; + seriesTitle: string | null; + seriesIsWatched: boolean | null; + userId: string; + coverUrl: string; + tags: string[]; + caption: string; + cdate: string; + rating: { + like: number; + bookmark: number; + view: number; + }; + text: string; + marker: null; + illusts: string[]; + images: { + [key: string]: { + novelImageId: string; + sl: string; + urls: { + '240mw': string; + '480mw': string; + '1200x1200': string; + '128x128': string; + original: string; + }; + }; + }; + seriesNavigation: { + nextNovel: null; + prevNovel: { + id: number; + viewable: boolean; + contentOrder: string; + title: string; + coverUrl: string; + viewableMessage: null; + } | null; + } | null; + glossaryItems: string[]; + replaceableItemIds: string[]; + aiType: number; + isOriginal: boolean; +} + +function getNovels(user_id: string, token: string): Promise { + return got('https://app-api.pixiv.net/v1/user/novels', { + headers: { + ...maskHeader, + Authorization: 'Bearer ' + token, + }, + searchParams: queryString.stringify({ + user_id, + filter: 'for_ios', + }), + }); +} + +async function getNovelFullContent(novel_id: string, token: string): Promise { + return (await cache.tryGet(`https://app-api.pixiv.net/webview/v2/novel:${novel_id}`, async () => { + // https://github.com/mikf/gallery-dl/blob/main/gallery_dl/extractor/pixiv.py + // https://github.com/mikf/gallery-dl/commit/db507e30c7431d4ed7e23c153a044ce1751c2847 + const response = await got('https://app-api.pixiv.net/webview/v2/novel', { + headers: { + ...maskHeader, + Authorization: 'Bearer ' + token, + }, + searchParams: queryString.stringify({ + id: novel_id, + viewer_version: '20221031_ai', + }), + }); + + const virtualConsole = new VirtualConsole().on('error', () => void 0); + + const { window } = new JSDOM(response.data, { + runScripts: 'dangerously', + virtualConsole, + }); + + const novelDetail = window.pixiv?.novel as nsfwNovelDetail; + + window.close(); + + if (!novelDetail) { + throw new Error('No novel data found'); + } + + return novelDetail; + })) as nsfwNovelDetail; +} + +function convertPixivProtocolExtended(caption: string): string { + const protocolMap = new Map([ + [/pixiv:\/\/novels\/(\d+)/g, 'https://www.pixiv.net/novel/show.php?id=$1'], + [/pixiv:\/\/illusts\/(\d+)/g, 'https://www.pixiv.net/artworks/$1'], + [/pixiv:\/\/users\/(\d+)/g, 'https://www.pixiv.net/users/$1'], + [/pixiv:\/\/novel\/series\/(\d+)/g, 'https://www.pixiv.net/novel/series/$1'], + ]); + + let convertedText = caption; + + for (const [pattern, replacement] of protocolMap) { + convertedText = convertedText.replace(pattern, replacement); + } + + return convertedText; +} + +export async function getR18Novels(id: string, fullContent: boolean, limit: number = 100) { + if (!config.pixiv || !config.pixiv.refreshToken) { + throw new ConfigNotFoundError( + '該用戶爲 R18 創作者,需要 PIXIV_REFRESHTOKEN。This user is an R18 creator, PIXIV_REFRESHTOKEN is required - pixiv RSS is disabled due to the lack of relevant config' + ); + } + + const token = await getToken(cache.tryGet); + if (!token) { + throw new ConfigNotFoundError('pixiv not login'); + } + + const response = await getNovels(id, token); + const novels = limit ? response.data.novels.slice(0, limit) : response.data.novels; + const username = novels[0].user.name; + + const items = await Promise.all( + novels.map(async (novel) => { + const baseItem = { + title: novel.series?.title ? `${novel.series.title} - ${novel.title}` : novel.title, + description: ` + +

${convertPixivProtocolExtended(novel.caption) || ''}

+

+ 字數:${novel.text_length}
+ 閱覽數:${novel.total_view}
+ 收藏數:${novel.total_bookmarks}
+ 評論數:${novel.total_comments}
+

`, + author: novel.user.name, + pubDate: parseDate(novel.create_date), + link: `https://www.pixiv.net/novel/show.php?id=${novel.id}`, + category: novel.tags.map((t) => t.name), + }; + + if (!fullContent) { + return baseItem; + } + + try { + const novelDetail = await getNovelFullContent(novel.id, token); + const images = Object.fromEntries( + Object.entries(novelDetail.images) + .filter(([, image]) => image?.urls?.original) + .map(([id, image]) => [id, image.urls.original.replace('https://i.pximg.net', config.pixiv.imgProxy || '')]) + ); + + const content = await pixivUtils.parseNovelContent(novelDetail.text, images, token); + + return { + ...baseItem, + description: `${baseItem.description}
${content}`, + }; + } catch { + return baseItem; + } + }) + ); + + return { + title: `${username}'s novels - pixiv`, + description: `${username} 的 pixiv 最新小说`, + image: pixivUtils.getProxiedImageUrl(novels[0].user.profile_image_urls.medium), + link: `https://www.pixiv.net/users/${id}/novels`, + item: items, + }; +} diff --git a/lib/routes/pixiv/api/get-novels-sfw.ts b/lib/routes/pixiv/api/get-novels-sfw.ts new file mode 100644 index 00000000000000..7bb74c8970a02e --- /dev/null +++ b/lib/routes/pixiv/api/get-novels-sfw.ts @@ -0,0 +1,201 @@ +import got from '@/utils/got'; +import cache from '@/utils/cache'; +import pixivUtils from '../utils'; +import { parseDate } from '@/utils/parse-date'; + +const baseUrl = 'https://www.pixiv.net'; +interface sfwNovelWork { + id: string; + title: string; + genre: string; + xRestrict: number; + restrict: number; + url: string; + tags: string[]; + userId: string; + userName: string; + profileImageUrl: string; + textCount: number; + wordCount: number; + readingTime: number; + useWordCount: boolean; + description: string; + isBookmarkable: boolean; + bookmarkData: null; + bookmarkCount: number; + isOriginal: boolean; + marker: null; + titleCaptionTranslation: { + workTitle: null; + workCaption: null; + }; + createDate: string; + updateDate: string; + isMasked: boolean; + aiType: number; + seriesId: string; + seriesTitle: string; + isUnlisted: boolean; +} + +interface sfwNovelsResponse { + data: { + error: boolean; + message: string; + body: { + works: Record; + extraData: { + meta: { + title: string; + description: string; + canonical: string; + ogp: { + description: string; + image: string; + title: string; + type: string; + }; + twitter: { + description: string; + image: string; + title: string; + card: string; + }; + alternateLanguages: { + ja: string; + en: string; + }; + descriptionHeader: string; + }; + }; + }; + }; +} + +interface sfwNovelDetail { + body: { + content: string; + textEmbeddedImages: Record< + string, + { + novelImageId: string; + sl: string; + urls: { + original: string; + '1200x1200': string; + '480mw': string; + '240mw': string; + '128x128': string; + }; + } + >; + }; +} + +async function getNovelFullContent(novel_id: string): Promise<{ content: string; images: Record }> { + const url = `${baseUrl}/ajax/novel/${novel_id}`; + return (await cache.tryGet(url, async () => { + const response = await got(url, { + headers: { + referer: `${baseUrl}/novel/show.php?id=${novel_id}`, + }, + }); + + const novelDetail = response.data as sfwNovelDetail; + + if (!novelDetail) { + throw new Error('No novel data found'); + } + + const images: Record = {}; + + if (novelDetail.body.textEmbeddedImages) { + for (const [id, image] of Object.entries(novelDetail.body.textEmbeddedImages)) { + images[id] = pixivUtils.getProxiedImageUrl(image.urls.original); + } + } + + return { + content: novelDetail.body.content, + images, + }; + })) as { content: string; images: Record }; +} + +export async function getNonR18Novels(id: string, fullContent: boolean, limit: number = 100) { + const url = `${baseUrl}/users/${id}/novels`; + const { data: allData } = await got(`${baseUrl}/ajax/user/${id}/profile/all`, { + headers: { + referer: url, + }, + }); + + const novels = Object.keys(allData.body.novels) + .sort((a, b) => Number(b) - Number(a)) + .slice(0, Number.parseInt(String(limit), 10)); + + if (novels.length === 0) { + throw new Error('No novels found, fallback to R18 API'); + // Throw error early to avoid unnecessary API requests + // Since hasPixivAuth() check failed earlier and R18 API requires authentication, this will result in ConfigNotFoundError + } + + const searchParams = new URLSearchParams(); + for (const novel of novels) { + searchParams.append('ids[]', novel); + } + + const { data } = (await got(`${baseUrl}/ajax/user/${id}/profile/novels`, { + headers: { + referer: url, + }, + searchParams, + })) as sfwNovelsResponse; + + const items = await Promise.all( + Object.values(data.body.works).map(async (item) => { + const baseItem = { + title: item.title, + description: ` + +

${item.description}

+

+ 字數:${item.textCount}
+ 閱讀時間:${item.readingTime} 分鐘
+ 收藏數:${item.bookmarkCount}
+

+ `, + link: `${baseUrl}/novel/show.php?id=${item.id}`, + author: item.userName, + pubDate: parseDate(item.createDate), + updated: parseDate(item.updateDate), + category: item.tags, + }; + + if (!fullContent) { + return baseItem; + } + + try { + const { content: initialContent, images } = await getNovelFullContent(item.id); + + const content = await pixivUtils.parseNovelContent(initialContent, images); + + return { + ...baseItem, + description: `${baseItem.description}
${content}`, + }; + } catch { + return baseItem; + } + }) + ); + + return { + title: data.body.extraData.meta.title, + description: data.body.extraData.meta.ogp.description, + image: pixivUtils.getProxiedImageUrl(Object.values(data.body.works)[0].profileImageUrl), + link: url, + item: items, + }; +} diff --git a/lib/routes/pixiv/novels.ts b/lib/routes/pixiv/novels.ts index 9d9ccdf5b9d3a3..4bdc76e286e11f 100644 --- a/lib/routes/pixiv/novels.ts +++ b/lib/routes/pixiv/novels.ts @@ -1,15 +1,36 @@ -import { Route } from '@/types'; -import got from '@/utils/got'; -import { parseDate } from '@/utils/parse-date'; -const baseUrl = 'https://www.pixiv.net'; +import { Data, Route, ViewType } from '@/types'; +import { fallback, queryToBoolean } from '@/utils/readable-social'; +import { getR18Novels } from './api/get-novels-nsfw'; +import { getNonR18Novels } from './api/get-novels-sfw'; +import { config } from '@/config'; export const route: Route = { - path: '/user/novels/:id', + path: '/user/novels/:id/:full_content?', categories: ['social-media'], + view: ViewType.Articles, example: '/pixiv/user/novels/27104704', - parameters: { id: "User id, available in user's homepage URL" }, + parameters: { + id: "User id, available in user's homepage URL", + full_content: { + description: 'Enable or disable the display of full content. ', + options: [ + { value: 'true', label: 'true' }, + { value: 'false', label: 'false' }, + ], + default: 'false', + }, + }, features: { - requireConfig: false, + requireConfig: [ + { + name: 'PIXIV_REFRESHTOKEN', + optional: true, + description: ` +Pixiv 登錄後的 refresh_token,用於獲取 R18 小說 +refresh_token after Pixiv login, required for accessing R18 novels +[https://docs.rsshub.app/deploy/config#pixiv](https://docs.rsshub.app/deploy/config#pixiv)`, + }, + ], requirePuppeteer: false, antiCrawler: false, supportBT: false, @@ -18,54 +39,53 @@ export const route: Route = { }, radar: [ { + title: 'User Novels (簡介 Basic info)', source: ['www.pixiv.net/users/:id/novels'], + target: '/user/novels/:id', + }, + { + title: 'User Novels (全文 Full text)', + source: ['www.pixiv.net/users/:id/novels'], + target: '/user/novels/:id/true', }, ], name: 'User Novels', - maintainers: ['TonyRL'], + maintainers: ['TonyRL', 'SnowAgar25'], handler, + description: ` +| 小說類型 Novel Type | full_content | PIXIV_REFRESHTOKEN | 返回內容 Content | +|-------------------|--------------|-------------------|-----------------| +| Non R18 | false | 不需要 Not Required | 簡介 Basic info | +| Non R18 | true | 不需要 Not Required | 全文 Full text | +| R18 | false | 需要 Required | 簡介 Basic info | +| R18 | true | 需要 Required | 全文 Full text | + +Default value for \`full_content\` is \`false\` if not specified. + +Example: +- \`/pixiv/user/novels/79603797\` → 簡介 Basic info +- \`/pixiv/user/novels/79603797/true\` → 全文 Full text`, }; -async function handler(ctx) { +const hasPixivAuth = () => Boolean(config.pixiv && config.pixiv.refreshToken); + +async function handler(ctx): Promise { const id = ctx.req.param('id'); - const { limit = 100 } = ctx.req.query(); - const url = `${baseUrl}/users/${id}/novels`; - const { data: allData } = await got(`${baseUrl}/ajax/user/${id}/profile/all`, { - headers: { - referer: url, - }, - }); + const fullContent = fallback(undefined, queryToBoolean(ctx.req.param('full_content')), false); - const novels = Object.keys(allData.body.novels) - .sort((a, b) => b - a) - .slice(0, Number.parseInt(limit, 10)); - const searchParams = new URLSearchParams(); - for (const novel of novels) { - searchParams.append('ids[]', novel); - } + const { limit } = ctx.req.query(); - const { data } = await got(`${baseUrl}/ajax/user/${id}/profile/novels`, { - headers: { - referer: url, - }, - searchParams, - }); + // Use R18 API first if auth exists + if (hasPixivAuth()) { + return await getR18Novels(id, fullContent, limit); + } - const items = Object.values(data.body.works).map((item) => ({ - title: item.seriesTitle || item.title, - description: item.description || item.title, - link: `${baseUrl}/novel/series/${item.id}`, - author: item.userName, - pubDate: parseDate(item.createDate), - updated: parseDate(item.updateDate), - category: item.tags, - })); + // Attempt non-R18 API when Pixiv auth is missing + const nonR18Result = await getNonR18Novels(id, fullContent, limit).catch(() => null); + if (nonR18Result) { + return nonR18Result; + } - return { - title: data.body.extraData.meta.title, - description: data.body.extraData.meta.ogp.description, - image: Object.values(data.body.works)[0].profileImageUrl, - link: url, - item: items, - }; + // Fallback to R18 API as last resort + return await getR18Novels(id, fullContent, limit); } diff --git a/lib/routes/pixiv/utils.ts b/lib/routes/pixiv/utils.ts index 824f742e3a95da..9bd7dab5a00f13 100644 --- a/lib/routes/pixiv/utils.ts +++ b/lib/routes/pixiv/utils.ts @@ -1,4 +1,6 @@ import { config } from '@/config'; +import { load } from 'cheerio'; +import getIllustDetail from './api/get-illust-detail'; export default { getImgs(illust) { @@ -14,4 +16,121 @@ export default { } return images; }, + getProxiedImageUrl(originalUrl: string): string { + return originalUrl.replace('https://i.pximg.net', config.pixiv.imgProxy || ''); + }, + // docs: https://www.pixiv.help/hc/ja/articles/235584168-小説作品の本文内に使える特殊タグとは + async parseNovelContent(content: string, images: Record, token?: string): Promise { + try { + // 如果有 token,處理 pixiv 圖片引用 + // If token exists, process pixiv image references + if (token) { + const imageMatches = [...content.matchAll(/\[pixivimage:(\d+)(?:-(\d+))?\]/g)]; + const imageIdToUrl = new Map(); + + // 批量獲取圖片資訊 + // Batch fetch image information + await Promise.all( + imageMatches.map(async ([, illustId, pageNum]) => { + if (!illustId) { + return; + } + + try { + const illust = (await getIllustDetail(illustId, token)).data.illust; + const pixivimages = this.getImgs(illust).map((img) => img.match(/src="([^"]+)"/)?.[1] || ''); + + const imageUrl = pixivimages[Number(pageNum) || 0]; + if (imageUrl) { + imageIdToUrl.set(pageNum ? `${illustId}-${pageNum}` : illustId, imageUrl); + } + } catch (error) { + // 記錄錯誤但不中斷處理 + // Log error but don't interrupt processing + logger.warn(`Failed to fetch illust detail for ID ${illustId}: ${error instanceof Error ? error.message : String(error)}`); + } + }) + ); + + // 替換 pixiv 圖片引用為 img 標籤 + // Replace pixiv image references with img tags + content = content.replaceAll(/\[pixivimage:(\d+)(?:-(\d+))?\]/g, (match, illustId, pageNum) => { + const key = pageNum ? `${illustId}-${pageNum}` : illustId; + const imageUrl = imageIdToUrl.get(key); + return imageUrl ? `pixiv illustration ${illustId}${pageNum ? ` page ${pageNum}` : ''}` : match; + }); + } else { + /* + * 處理 get-novels-sfw 的情況 + * 當沒有 PIXIV_REFRESHTOKEN 時,將 [pixivimage:(\d+)] 格式轉換為 artwork 連結 + * 因無法獲取 Pixiv 作品詳情,改為提供直接連結到原始作品頁面 + * + * Handle get-novels-sfw case + * When PIXIV_REFRESHTOKEN is not available, convert [pixivimage:(\d+)] format to artwork link + * Provide direct link to original artwork page since artwork details cannot be retrieved + */ + content = content.replaceAll(/\[pixivimage:(\d+)(?:-(\d+))?\]/g, (_, illustId) => `Pixiv Artwork #${illustId}`); + } + + // 處理作者上傳的圖片 + // Process author uploaded images + content = content.replaceAll(/\[uploadedimage:(\d+)\]/g, (match, imageId) => { + if (images[imageId]) { + return `novel illustration ${imageId}`; + } + return match; + }); + + // 基本格式處理 + // Basic formatting + content = content + // 換行轉換為 HTML 換行 + // Convert newlines to HTML breaks + .replaceAll('\n', '
') + // 連續換行轉換為段落 + // Convert consecutive breaks to paragraphs + .replaceAll(/(
){2,}/g, '

') + // ruby 標籤(為日文漢字標註讀音) + // ruby tags (for Japanese kanji readings) + .replaceAll(/\[\[rb:(.*?)>(.*?)\]\]/g, '$1$2') + // 外部連結 + // external links + .replaceAll(/\[\[jumpuri:(.*?)>(.*?)\]\]/g, '$1') + // 頁面跳轉,但由於 [newpage] 使用 hr 分隔,沒有頁數,沒必要跳轉,所以只顯示文字 + // Page jumps, but since [newpage] uses hr separators, without the page numbers, jumping isn't needed, so just display text + .replaceAll(/\[jump:(\d+)\]/g, 'Jump to page $1') + // 章節標題 + // chapter titles + .replaceAll(/\[chapter:(.*?)\]/g, '

$1

') + // 分頁符 + // page breaks + .replaceAll('[newpage]', '
'); + + // 使用 cheerio 進行 HTML 清理和優化 + // Use cheerio for HTML cleanup and optimization + const $content = load(`

${content}

`); + + // 處理嵌套段落:移除多餘的嵌套 + // Handle nested paragraphs: remove unnecessary nesting + $content('p p').each((_, elem) => { + const $elem = $content(elem); + $elem.replaceWith($elem.html() || ''); + }); + + // 處理段落中的標題:確保正確的 HTML 結構 + // Handle headings in paragraphs: ensure correct HTML structure + $content('p h2').each((_, elem) => { + const $elem = $content(elem); + const $parent = $elem.parent('p'); + const html = $elem.prop('outerHTML'); + if ($parent.length && html) { + $parent.replaceWith(`

${html}

`); + } + }); + + return $content.html() || ''; + } catch (error) { + throw new Error(`Error parsing novel content: ${error instanceof Error ? error.message : String(error)}`); + } + }, };