From 8efb4c6f387d0993da35f501558b8f68908e1636 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 28 Oct 2024 15:14:39 -0400 Subject: [PATCH] Parse full headers --- cdxj_indexer/postquery.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cdxj_indexer/postquery.py b/cdxj_indexer/postquery.py index 360f28b..2071af9 100644 --- a/cdxj_indexer/postquery.py +++ b/cdxj_indexer/postquery.py @@ -23,16 +23,16 @@ def append_method_query_from_req_resp(req, resp): url = req.rec_headers.get_header("WARC-Target-URI") method = req.http_headers.protocol - return append_method_query(method, content_type, len_, stream, url) + return append_method_query(method, content_type, len_, stream, url, req.http_headers) # ============================================================================ -def append_method_query(method, content_type, len_, stream, url): +def append_method_query(method, content_type, len_, stream, url, request_headers): # if method == 'GET': # return '', '' if method == "POST" or method == "PUT": - query = query_extract(content_type, len_, stream, url) + query = query_extract(content_type, len_, stream, url, request_headers) else: query = "" @@ -49,7 +49,7 @@ def append_method_query(method, content_type, len_, stream, url): # ============================================================================ -def query_extract(mime, length, stream, url): +def query_extract(mime, length, stream, url, request_headers): """ Extract a url-encoded form POST/PUT from stream content length, return None @@ -95,19 +95,19 @@ def handle_binary(query_data): query = handle_binary(query_data) elif mime.startswith("multipart/"): - env = { - "REQUEST_METHOD": "POST", - "CONTENT_TYPE": mime, - "CONTENT_LENGTH": len(query_data), - } + # env = { + # "REQUEST_METHOD": "POST", + # "CONTENT_TYPE": mime, + # "CONTENT_LENGTH": len(query_data), + # } - args = dict(fp=BytesIO(query_data), environ=env, keep_blank_values=True) + # args = dict(fp=BytesIO(query_data), environ=env, keep_blank_values=True) try: - _, params = parse_options_header(mime) + _, params = parse_options_header(request_headers) boundary = params["boundary"] parser = MultipartParser(stream, boundary, charset="utf8") - except ValueError: + except KeyError: # Content-Type multipart/form-data may lack "boundary" info query = handle_binary(query_data) else: