Skip to content

Commit

Permalink
fix text streaming for multiwacz (#126)
Browse files Browse the repository at this point in the history
support /textIndex for multiwacz collections, stream text for all wacz files
bump to 2.16.3
  • Loading branch information
ikreymer authored Jun 28, 2023
1 parent 989082a commit 7b2627e
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 19 deletions.
2 changes: 1 addition & 1 deletion dist/sw.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@webrecorder/wabac",
"version": "2.16.2",
"version": "2.16.3",
"main": "index.js",
"type": "module",
"license": "AGPL-3.0-or-later",
Expand Down
60 changes: 44 additions & 16 deletions src/wacz/multiwacz.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import { getSurt } from "warcio";
import { LiveProxy } from "../liveproxy.js";

import { INDEX_CDX, INDEX_IDX, INDEX_NOT_LOADED, NO_LOAD_WACZ, WACZFile, WACZ_LEAF } from "./waczfile.js";
import { WACZImporter } from "./waczimporter.js";
import { EXTRA_PAGES_JSON, WACZImporter } from "./waczimporter.js";
import { createLoader } from "../blockloaders.js";

const MAX_BLOCKS = 3;
Expand Down Expand Up @@ -35,7 +35,7 @@ export class MultiWACZ extends OnDemandPayloadArchiveDB// implements WACZLoadSou
this.externalSource = null;
this.fuzzyUrlRules = [];

this.textIndex = config && config.metadata && config.metadata.textIndex;
this.textIndex = (config && config.metadata && config.metadata.textIndex) || EXTRA_PAGES_JSON;

if (config.extraConfig) {
this.initConfig(config.extraConfig);
Expand Down Expand Up @@ -696,26 +696,54 @@ export class MultiWACZ extends OnDemandPayloadArchiveDB// implements WACZLoadSou
return new Response("", {headers});
}

// just look at first wacz for now
const waczname = keys[0];
if (keys.length === 1) {
const waczname = keys[0];

let result;
let result;

try {
result = await this.loadFileFromNamedWACZ(waczname, this.textIndex, {unzip: true});
} catch (e) {
return new Response("", {headers});
}
try {
result = await this.loadFileFromNamedWACZ(waczname, this.textIndex, {unzip: true});
} catch (e) {
return new Response("", {headers});
}

const {reader} = result;
const {reader} = result;

const size = this.waczfiles[waczname].getSizeOf(this.textIndex);
const size = this.waczfiles[waczname].getSizeOf(this.textIndex);

if (size > 0) {
headers["Content-Length"] = "" + size;
}
if (size > 0) {
headers["Content-Length"] = "" + size;
}

return new Response(reader.getReadableStream(), {headers});
} else {

const readers = [];

return new Response(reader.getReadableStream(), {headers});
for (const waczname of keys) {
try {
const { reader } = await this.loadFileFromNamedWACZ(waczname, this.textIndex, {unzip: true});
if (reader) {
readers.push(reader);
}
} catch (e) {
continue;
}
}

const rs = new ReadableStream({
async pull(controller) {
for (const reader of readers) {
for await (const chunk of reader) {
controller.enqueue(chunk);
}
}
controller.close();
}
});

return new Response(rs, {headers});
}
}

async getResource(request, prefix, event, {pageId} = {}) {
Expand Down
2 changes: 1 addition & 1 deletion src/wacz/waczimporter.js
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,4 @@ export class WACZImporter

return pageListInfo;
}
}
}

0 comments on commit 7b2627e

Please sign in to comment.