Skip to content

Commit

Permalink
Fix for FB video replay (#212)
Browse files Browse the repository at this point in the history
- New fuzzy matching rules for FB, matching and rewriting embedded DASH (again)
- New ruleset: range-as-query args configured per domain, allows lookup of range requests set via query args
- APIs: add hasRangeAsQuery() and removeRangeAsQuery() to detect if URL has range embedded in the query args
- Part of fix for [Bug]: web-recorder can not detect facebook video webrecorder/archiveweb.page#272
  • Loading branch information
ikreymer authored Nov 20, 2024
1 parent b77d6aa commit 9308ec3
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 23 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"path-parser": "^6.1.0",
"process": "^0.11.10",
"stream-browserify": "^3.0.0",
"warcio": "^2.4.0"
"warcio": "^2.4.2"
},
"devDependencies": {
"@swc-node/register": "^1.10.9",
Expand Down
2 changes: 1 addition & 1 deletion src/collection.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { Rewriter } from "./rewrite/index";
import { Rewriter } from "./rewrite";

import {
getTS,
Expand Down
2 changes: 2 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ export {
Rewriter,
} from "./rewrite";

export { removeRangeAsQuery, hasRangeAsQuery } from "./rewrite/dsruleset";

export { ArchiveRequest } from "./request";

export { ArchiveResponse } from "./response";
Expand Down
33 changes: 17 additions & 16 deletions src/response.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { BaseAsyncIterReader, AsyncIterReader, LimitReader } from "warcio";
import { BaseAsyncIterReader, AsyncIterReader } from "warcio";
import {
isNullBodyStatus,
decodeLatin1,
Expand Down Expand Up @@ -300,21 +300,8 @@ class ArchiveResponse {
const start = Number(bytes[1]);
const end = Number(bytes[2]) || length - 1;

if (this.buffer) {
this.buffer = this.buffer.slice(start, end + 1);
} else if (this.reader) {
// [TODO]
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!(this.reader instanceof LimitReader) || !this.reader.setLimitSkip) {
return false;
}
if (start !== 0 || end !== length - 1) {
this.reader.setLimitSkip(end - start + 1, start);
}
//TODO
// } else if (this.reader.setRangeAll) {
// this.reader.setRangeAll(length);
// }
if (!this.setRawRange(start, end)) {
return false;
}

this.headers.set("Content-Range", `bytes ${start}-${end}/${length}`);
Expand All @@ -326,6 +313,20 @@ class ArchiveResponse {
return true;
}

setRawRange(start: number, end: number) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const reader = this.reader as any;
if (this.buffer) {
this.buffer = this.buffer.slice(start, end + 1);
return true;
} else if (reader?.setLimitSkip) {
reader.setLimitSkip(end - start + 1, start);
return true;
}

return false;
}

makeResponse(coHeaders = false, overwriteDisposition = false) {
let body: Uint8Array | ReadableStream | null = null;
if (!isNullBodyStatus(this.status)) {
Expand Down
65 changes: 61 additions & 4 deletions src/rewrite/dsruleset.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { rewriteDASH } from "./rewriteVideo";
import { type RxRewriter, type Rule } from "./rxrewriter";

//import unescapeJs from "unescape-js";
Expand Down Expand Up @@ -49,10 +50,10 @@ export const DEFAULT_RULES: Rules[] = [
{
contains: ["facebook.com/", "fbsbx.com/"],
rxRules: [
//[/"dash_prefetch_experimental.*"playlist".*?(?=["][,]["]dash)/, ruleRewriteFBDash],
[/"dash_/, ruleReplace('"__nodash__')],
[/_dash"/, ruleReplace('__nodash__"')],
[/_dash_/, ruleReplace("__nodash__")],
[/"dash_manifests.*?,"failure_reason":null}]/, ruleRewriteFBDash],
//[/"dash_/, ruleReplace('"__nodash__')],
//[/_dash"/, ruleReplace('__nodash__"')],
//[/_dash_/, ruleReplace("__nodash__")],
[/"playlist/, ruleReplace('"__playlist__')],
[
/"debugNoBatching\s?":(?:false|0)/,
Expand Down Expand Up @@ -124,6 +125,62 @@ export const HTML_ONLY_RULES: Rules[] = [
...DEFAULT_RULES,
];

const RANGE_RULES = [
{
contains: /video.*fbcdn.net/,
start: "bytestart",
end: "byteend",
},
];

export function hasRangeAsQuery(url: string) {
if (!url) {
return null;
}
for (const rule of RANGE_RULES) {
const { contains, start, end } = rule;
if (url.match(contains)) {
return { start, end };
}
}

return null;
}

export function removeRangeAsQuery(url: string) {
const result = hasRangeAsQuery(url);
if (!result) {
return null;
}
try {
const parsedUrl = new URL(url);
if (
!parsedUrl.searchParams.has(result.start) ||
!parsedUrl.searchParams.has(result.end)
) {
return null;
}
parsedUrl.searchParams.delete(result.start);
parsedUrl.searchParams.delete(result.end);
return parsedUrl.href;
} catch (_e) {
return null;
}
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
export function ruleRewriteFBDash(text: string, opts: Record<string, any>) {
const start = text.indexOf("\\u003C?xml");
const end = text.indexOf("\\u003C\\/MPD>", start) + "\\u003C\\/MPD>".length;
const rwtext: string = JSON.parse('"' + text.slice(start, end) + '"');

let rw = rewriteDASH(rwtext, opts);

rw = JSON.stringify(rw).replaceAll("<", "\\u003C").slice(1, -1);

return text.slice(0, start) + rw + text.slice(end);
}

// ===========================================================================
function ruleReplace(str: string) {
return (x: string) => str.replace("{0}", x);
Expand Down
26 changes: 25 additions & 1 deletion src/rewrite/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ import { decodeResponse } from "./decoder";

import { rewriteDASH, rewriteHLS } from "./rewriteVideo";

import { DomainSpecificRuleSet, HTML_ONLY_RULES } from "./dsruleset";
import {
DomainSpecificRuleSet,
hasRangeAsQuery,
HTML_ONLY_RULES,
} from "./dsruleset";

import { RxRewriter } from "./rxrewriter";
import { JSRewriter } from "./jsrewriter";
Expand Down Expand Up @@ -322,6 +326,26 @@ export class Rewriter {
this.isCharsetUTF8 = true;
}
response.setText(text, this.isCharsetUTF8);
} else {
// check range-as-query
const result = hasRangeAsQuery(request.url);
if (result) {
const url = new URL(request.url);
const start = parseInt(url.searchParams.get(result.start) || "");
const end = parseInt(url.searchParams.get(result.end) || "");
if (!isNaN(start) && !isNaN(end)) {
const existingLen = Number(response.headers.get("Content-Length"));
const newLen = end - start + 1;
if (
existingLen !== newLen &&
(isNaN(existingLen) || existingLen > newLen) &&
response.setRawRange(start, end)
) {
console.log("setting range", start, end, newLen);
response.headers.set("Content-Length", String(newLen));
}
}
}
}

return response;
Expand Down
2 changes: 2 additions & 0 deletions test/rewriteVideo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ const test4 = ytplayer.config.args.dash = "0"; ytplayer.config.args.dashmpd = ""
t.is(result, expected, result);
});

/*
test("FB rewrite JS", async (t) => {
const content = `\
<script>
Expand All @@ -228,6 +229,7 @@ const test1 = {"__nodash__url": "foo", {"some__nodash__": "a", "data__nodash__fo
});
t.is(result, expected);
});
*/

test("Twitter rewrite json", async (t) => {
const content = {
Expand Down
14 changes: 14 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3764,6 +3764,20 @@ warcio@^2.4.0:
uuid-random "^1.3.2"
yargs "^17.7.2"

warcio@^2.4.2:
version "2.4.2"
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.2.tgz#782d8dcb0769f271b0ae96521fb4969e2570e9b3"
integrity sha512-QYbZ3EGYtnAIrzL7Bajo7ak87pipilpkIfaFIzFQWUX4wuXNuKqnfQy/EAoi2tEIl3VJgsWcL+wjjk4+15MKbQ==
dependencies:
"@types/pako" "^1.0.7"
"@types/stream-buffers" "^3.0.7"
base32-encode "^2.0.0"
hash-wasm "^4.9.0"
pako "^1.0.11"
tempy "^3.1.0"
uuid-random "^1.3.2"
yargs "^17.7.2"

watchpack@^2.4.0:
version "2.4.0"
resolved "https://registry.yarnpkg.com/watchpack/-/watchpack-2.4.0.tgz#fa33032374962c78113f93c7f2fb4c54c9862a5d"
Expand Down

0 comments on commit 9308ec3

Please sign in to comment.