Skip to content

Commit

Permalink
add multiValueHeader(name, value) helper for creating a multi-value h…
Browse files Browse the repository at this point in the history
…eader, also validates that the name is allowed

fix utils exports
add test for multiValueHeader() and WARC-Protocol
bump to 2.4.1
  • Loading branch information
ikreymer committed Nov 19, 2024
1 parent ddc5507 commit f372dcb
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 24 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "warcio",
"version": "2.4.0",
"version": "2.4.1",
"keywords": [
"WARC",
"web archiving"
Expand Down
20 changes: 1 addition & 19 deletions src/lib/statusandheaders.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { concatChunks, HeadersMultiMap, splitChunk } from "./utils";
import { concatChunks, HeadersMultiMap, latin1ToUTF, splitChunk, UTFToLatin1 } from "./utils";
import { type AsyncIterReader } from "./readers";

export const CRLF = new Uint8Array([13, 10]);
Expand Down Expand Up @@ -220,24 +220,6 @@ function splitRemainder(str: string, sep: string, limit: number) {
return newParts;
}

// ===========================================================================
function UTFToLatin1(value: string) {
const buf = new TextEncoder().encode(value);

let str = "";
buf.forEach((x) => (str += String.fromCharCode(x)));
return str;
}

// ===========================================================================
function latin1ToUTF(str: string) {
const buf = new Uint8Array(str.length);
for (let i = 0; i < str.length; i++) {
buf[i] = str.charCodeAt(i) & 0xff;
}
return new TextDecoder().decode(buf);
}

// ===========================================================================
export async function indexOfDoubleCRLF(
buffer: Uint8Array,
Expand Down
25 changes: 25 additions & 0 deletions src/lib/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,24 @@ export function splitChunk(
return [chunk.slice(0, inx), chunk.slice(inx)];
}

// ===========================================================================
export function UTFToLatin1(value: string) {
const buf = new TextEncoder().encode(value);

let str = "";
buf.forEach((x) => (str += String.fromCharCode(x)));
return str;
}

// ===========================================================================
export function latin1ToUTF(str: string) {
const buf = new Uint8Array(str.length);
for (let i = 0; i < str.length; i++) {
buf[i] = str.charCodeAt(i) & 0xff;
}
return new TextDecoder().decode(buf);
}

// ===========================================================================
// headers multi map
const MULTI_VALUE_ALLOWED = ["set-cookie", "warc-concurrent-to", "warc-protocol"];
Expand All @@ -297,6 +315,13 @@ const MULTI_VALUE_ALLOWED = ["set-cookie", "warc-concurrent-to", "warc-protocol"
// in theory, collision still possible with arbitrary cookie value
const JOIN_MARKER = ",,,";

export function multiValueHeader(name: string, value: string[]) {
if (!MULTI_VALUE_ALLOWED.includes(name.toLowerCase())) {
throw new Error("not a valid multi value header");
}
return value.join(JOIN_MARKER);
}

export class HeadersMultiMap extends Map<string, string> {
constructor(headersInit?: HeadersInit) {
// if an array of array, parse that and add individually here
Expand Down
14 changes: 10 additions & 4 deletions test/testSerializer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
WARCSerializer as BaseWARCSerializer,
} from "../src/lib";
import { WARCSerializer } from "../src/node/warcserializer";
import { multiValueHeader } from "../src/lib/utils";

const decoder = new TextDecoder("utf-8");
const encoder = new TextEncoder();
Expand Down Expand Up @@ -429,12 +430,13 @@ set-cookie: greeting=hello, name=world\r\n\
}
});

test("create request record with cookie array, keep headers case", async () => {
const url = "http://example.com/";
test("create request record with protocol + cookie array, keep headers case", async () => {
const url = "https://example.com/";
const date = "2000-01-01T00:00:00Z";
const type = "request";
const warcHeaders = {
"WARC-Record-ID": "<urn:uuid:12345678-feb0-11e6-8f83-68a86d1772ce>",
"WARC-Protocol": multiValueHeader("WARC-Protocol", ["h2", "tls/1.0"])
};
const httpHeaders: [string, string][] = [
["Set-Cookie", "greeting=hello"],
Expand Down Expand Up @@ -463,7 +465,9 @@ set-cookie: greeting=hello, name=world\r\n\
"\
WARC/1.0\r\n\
WARC-Record-ID: <urn:uuid:12345678-feb0-11e6-8f83-68a86d1772ce>\r\n\
WARC-Target-URI: http://example.com/\r\n\
WARC-Protocol: h2\r\n\
WARC-Protocol: tls/1.0\r\n\
WARC-Target-URI: https://example.com/\r\n\
WARC-Date: 2000-01-01T00:00:00Z\r\n\
WARC-Type: request\r\n\
Content-Type: application/http; msgtype=request\r\n\
Expand All @@ -484,7 +488,9 @@ Set-Cookie: name=world\r\n\
"\
WARC/1.0\r\n\
WARC-Record-ID: <urn:uuid:12345678-feb0-11e6-8f83-68a86d1772ce>\r\n\
WARC-Target-URI: http://example.com/\r\n\
WARC-Protocol: h2\r\n\
WARC-Protocol: tls/1.0\r\n\
WARC-Target-URI: https://example.com/\r\n\
WARC-Date: 2000-01-01T00:00:00Z\r\n\
WARC-Type: request\r\n\
Content-Type: application/http; msgtype=request\r\n\
Expand Down

0 comments on commit f372dcb

Please sign in to comment.