From 5a2e4eed74e20d8b2122c1b2b766d6daf59b21f0 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Sun, 22 Dec 2024 19:44:19 -0500 Subject: [PATCH] add more logging --- include/ada/url_pattern.h | 4 +-- include/ada/url_pattern_helpers-inl.h | 6 +++- include/ada/url_pattern_helpers.h | 2 +- src/parser.cpp | 33 +++++++++++++++++++++ src/url_pattern.cpp | 41 +++++++++++++++++++++------ src/url_pattern_helpers.cpp | 38 +++++++++++++++++++++++++ 6 files changed, 112 insertions(+), 12 deletions(-) diff --git a/include/ada/url_pattern.h b/include/ada/url_pattern.h index 0f27aa154..687c32f94 100644 --- a/include/ada/url_pattern.h +++ b/include/ada/url_pattern.h @@ -148,7 +148,7 @@ struct url_pattern_compile_component_options { explicit url_pattern_compile_component_options( std::optional new_delimiter = std::nullopt, std::optional new_prefix = std::nullopt) - : delimiter(new_delimiter), prefix(new_prefix){} + : delimiter(new_delimiter), prefix(new_prefix) {} std::string_view get_delimiter() const ada_warn_unused; std::string_view get_prefix() const ada_warn_unused; @@ -191,7 +191,7 @@ class url_pattern_component { flags(std::move(new_flags)), regexp(std::move(new_regexp)), group_name_list(std::move(new_group_name_list)), - has_regexp_groups_(new_has_regexp_groups){} + has_regexp_groups_(new_has_regexp_groups) {} // @see https://urlpattern.spec.whatwg.org/#compile-a-component template diff --git a/include/ada/url_pattern_helpers-inl.h b/include/ada/url_pattern_helpers-inl.h index 359d65d9b..8a4ba12e8 100644 --- a/include/ada/url_pattern_helpers-inl.h +++ b/include/ada/url_pattern_helpers-inl.h @@ -402,11 +402,15 @@ template std::optional url_pattern_parser::maybe_add_part_from_the_pending_fixed_value() { // If parser’s pending fixed value is the empty string, then return. - if (pending_fixed_value.empty()) return std::nullopt; + if (pending_fixed_value.empty()) { + ada_log("pending_fixed_value is empty"); + return std::nullopt; + } // Let encoded value be the result of running parser’s encoding callback given // parser’s pending fixed value. auto encoded_value = encoding_callback(pending_fixed_value); if (!encoded_value) { + ada_log("failed to encode pending_fixed_value: ", pending_fixed_value); return encoded_value.error(); } // Set parser’s pending fixed value to the empty string. diff --git a/include/ada/url_pattern_helpers.h b/include/ada/url_pattern_helpers.h index 0f83881af..c47e37e56 100644 --- a/include/ada/url_pattern_helpers.h +++ b/include/ada/url_pattern_helpers.h @@ -133,7 +133,7 @@ class Tokenizer { struct constructor_string_parser { explicit constructor_string_parser(std::string_view new_input, std::vector& new_token_list) - : input(new_input), token_list(new_token_list){} + : input(new_input), token_list(new_token_list) {} // @see https://urlpattern.spec.whatwg.org/#rewind void rewind(); diff --git a/src/parser.cpp b/src/parser.cpp index 51e974ea7..cdca218bd 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -912,6 +912,7 @@ tl::expected parse_url_pattern_impl( auto parse_result = url_pattern_helpers::constructor_string_parser::parse( std::get(input)); if (!parse_result) { + ada_log("constructor_string_parser::parse failed"); return tl::unexpected(parse_result.error()); } init = *parse_result; @@ -919,6 +920,7 @@ tl::expected parse_url_pattern_impl( // If baseURL is null and init["protocol"] does not exist, then throw a // TypeError. if (!base_url && !init.protocol) { + ada_log("base url is null and protocol is not set"); return tl::unexpected(url_pattern_errors::type_error); } @@ -931,6 +933,7 @@ tl::expected parse_url_pattern_impl( ADA_ASSERT_TRUE(std::holds_alternative(input)); // If baseURL is not null, then throw a TypeError. if (base_url) { + ada_log("base url is not null"); return tl::unexpected(url_pattern_errors::type_error); } // Optimization: Avoid copy by moving the input value. @@ -944,6 +947,7 @@ tl::expected parse_url_pattern_impl( init, "pattern", std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt, std::nullopt); if (!processed_init) { + ada_log("url_pattern_init::process failed for init and 'pattern'"); return tl::unexpected(processed_init.error()); } @@ -961,6 +965,15 @@ tl::expected parse_url_pattern_impl( if (!processed_init->search) processed_init->search = "*"; if (!processed_init->hash) processed_init->hash = "*"; + ada_log("-- processed_init->protocol: ", processed_init->protocol.value()); + ada_log("-- processed_init->username: ", processed_init->username.value()); + ada_log("-- processed_init->password: ", processed_init->password.value()); + ada_log("-- processed_init->hostname: ", processed_init->hostname.value()); + ada_log("-- processed_init->port: ", processed_init->port.value()); + ada_log("-- processed_init->pathname: ", processed_init->pathname.value()); + ada_log("-- processed_init->search: ", processed_init->search.value()); + ada_log("-- processed_init->hash: ", processed_init->hash.value()); + // If processedInit["protocol"] is a special scheme and processedInit["port"] // is a string which represents its corresponding default port in radix-10 // using ASCII digits then set processedInit["port"] to the empty string. @@ -982,6 +995,8 @@ tl::expected parse_url_pattern_impl( url_pattern_helpers::canonicalize_protocol, url_pattern_compile_component_options::DEFAULT); if (!protocol_component) { + ada_log("url_pattern_component::compile failed for protocol ", + processed_init->protocol.value()); return tl::unexpected(protocol_component.error()); } url_pattern_.protocol_component = std::move(*protocol_component); @@ -994,6 +1009,8 @@ tl::expected parse_url_pattern_impl( url_pattern_helpers::canonicalize_username, url_pattern_compile_component_options::DEFAULT); if (!username_component) { + ada_log("url_pattern_component::compile failed for username ", + processed_init->username.value()); return tl::unexpected(username_component.error()); } url_pattern_.username_component = std::move(*username_component); @@ -1006,6 +1023,8 @@ tl::expected parse_url_pattern_impl( url_pattern_helpers::canonicalize_password, url_pattern_compile_component_options::DEFAULT); if (!password_component) { + ada_log("url_pattern_component::compile failed for password ", + processed_init->password.value()); return tl::unexpected(password_component.error()); } url_pattern_.password_component = std::move(*password_component); @@ -1022,6 +1041,8 @@ tl::expected parse_url_pattern_impl( url_pattern_helpers::canonicalize_hostname, url_pattern_compile_component_options::DEFAULT); if (!hostname_component) { + ada_log("url_pattern_component::compile failed for ipv6 hostname ", + processed_init->hostname.value()); return tl::unexpected(hostname_component.error()); } url_pattern_.hostname_component = std::move(*hostname_component); @@ -1034,6 +1055,8 @@ tl::expected parse_url_pattern_impl( url_pattern_helpers::canonicalize_hostname, url_pattern_compile_component_options::HOSTNAME); if (!hostname_component) { + ada_log("url_pattern_component::compile failed for hostname ", + processed_init->hostname.value()); return tl::unexpected(hostname_component.error()); } url_pattern_.hostname_component = std::move(*hostname_component); @@ -1045,6 +1068,8 @@ tl::expected parse_url_pattern_impl( processed_init->port.value(), url_pattern_helpers::canonicalize_port, url_pattern_compile_component_options::DEFAULT); if (!port_component) { + ada_log("url_pattern_component::compile failed for port ", + processed_init->port.value()); return tl::unexpected(port_component.error()); } url_pattern_.port_component = std::move(*port_component); @@ -1075,6 +1100,8 @@ tl::expected parse_url_pattern_impl( processed_init->pathname.value(), url_pattern_helpers::canonicalize_pathname, path_compile_options); if (!pathname_component) { + ada_log("url_pattern_component::compile failed for pathname ", + processed_init->pathname.value()); return tl::unexpected(pathname_component.error()); } url_pattern_.pathname_component = std::move(*pathname_component); @@ -1086,6 +1113,8 @@ tl::expected parse_url_pattern_impl( processed_init->pathname.value(), url_pattern_helpers::canonicalize_opaque_pathname, compile_options); if (!pathname_component) { + ada_log("url_pattern_component::compile failed for opaque pathname ", + processed_init->pathname.value()); return tl::unexpected(pathname_component.error()); } url_pattern_.pathname_component = std::move(*pathname_component); @@ -1097,6 +1126,8 @@ tl::expected parse_url_pattern_impl( processed_init->search.value(), url_pattern_helpers::canonicalize_search, compile_options); if (!search_component) { + ada_log("url_pattern_component::compile failed for search ", + processed_init->search.value()); return tl::unexpected(search_component.error()); } url_pattern_.search_component = std::move(*search_component); @@ -1107,6 +1138,8 @@ tl::expected parse_url_pattern_impl( processed_init->hash.value(), url_pattern_helpers::canonicalize_hash, compile_options); if (!hash_component) { + ada_log("url_pattern_component::compile failed for hash ", + processed_init->hash.value()); return tl::unexpected(hash_component.error()); } url_pattern_.hash_component = std::move(*hash_component); diff --git a/src/url_pattern.cpp b/src/url_pattern.cpp index 6a7719d97..581d7f5f5 100644 --- a/src/url_pattern.cpp +++ b/src/url_pattern.cpp @@ -492,12 +492,14 @@ template tl::expected url_pattern_component::compile(std::string_view input, F encoding_callback, url_pattern_compile_component_options& options) { + ada_log("url_pattern_component::compile input: ", input); // Let part list be the result of running parse a pattern string given input, // options, and encoding callback. auto part_list = url_pattern_helpers::parse_pattern_string(input, options, encoding_callback); if (!part_list) { + ada_log("parse_pattern_string failed"); return tl::unexpected(part_list.error()); } @@ -507,6 +509,8 @@ url_pattern_component::compile(std::string_view input, F encoding_callback, url_pattern_helpers::generate_regular_expression_and_name_list(*part_list, options); + ada_log("regular expression string: ", regular_expression_string); + // Let flags be an empty string. // If options’s ignore case is true then set flags to "vi". // Otherwise set flags to "v" @@ -527,6 +531,8 @@ url_pattern_component::compile(std::string_view input, F encoding_callback, const auto has_regexp = [](const auto& part) { return part.is_regexp(); }; const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp); + ada_log("has regexp groups: ", has_regexp_groups); + // Return a new component whose pattern string is pattern string, regular // expression is regular expression, group name list is name list, and has // regexp groups is has regexp groups. @@ -718,24 +724,26 @@ std::string generate_segment_wildcard_regexp( // Append "]+?" to the end of result. result.append("]+?"); // Return result. + ada_log("generate_segment_wildcard_regexp result: ", result); return result; } bool protocol_component_matches_special_scheme( ada::url_pattern_component& component) { auto regex = component.get_regexp(); + ada_log("protocol_component_matches_special_scheme regex: ", regex); try { std::regex rx(regex.data(), regex.size()); std::cmatch cmatch; return std::regex_match("http", cmatch, rx) || - std::regex_match("https", cmatch, rx) || - std::regex_match("ws", cmatch, rx) || - std::regex_match("wss", cmatch, rx) || - std::regex_match("ftp", cmatch, rx); + std::regex_match("https", cmatch, rx) || + std::regex_match("ws", cmatch, rx) || + std::regex_match("wss", cmatch, rx) || + std::regex_match("ftp", cmatch, rx); } catch (...) { // You probably want to log this error. ada_log("Error while matching protocol component with special scheme"); - ada_log("Regex Input: ", input); + ada_log("Regex Input: ", regex); return false; } } @@ -866,7 +874,10 @@ url_pattern::match(url_pattern_input&& input, url = parsed_url.value(); // Set protocol to url’s scheme. - protocol = url.get_protocol(); + // IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':' + // is removed. Similar work was done on workerd: + // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038 + protocol = url.get_protocol().substr(0, url.get_protocol().size() - 1); // Set username to url’s username. username = url.get_username(); // Set password to url’s password. @@ -880,9 +891,23 @@ url_pattern::match(url_pattern_input&& input, // Set pathname to the result of URL path serializing url. pathname = url.get_pathname(); // Set search to url’s query or the empty string if the value is null. - search = url.get_search(); + // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?' + // is removed. Similar work was done on workerd: + // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232 + if (url.has_search()) { + search = url.get_search().substr(1); + } else { + search = ""; + } // Set hash to url’s fragment or the empty string if the value is null. - hash = url.get_hash(); + // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is + // removed. Similar work was done on workerd: + // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242 + if (url.has_hash()) { + hash = url.get_hash().substr(1); + } else { + hash = ""; + } } // TODO: Make this function pluggable using a parameter. diff --git a/src/url_pattern_helpers.cpp b/src/url_pattern_helpers.cpp index 1886bdb30..3eeed2002 100644 --- a/src/url_pattern_helpers.cpp +++ b/src/url_pattern_helpers.cpp @@ -495,6 +495,7 @@ constructor_string_parser::parse(std::string_view input) { tl::expected, url_pattern_errors> tokenize( std::string_view input, token_policy policy) { + ada_log("tokenize input: ", input); // Let tokenizer be a new tokenizer. // Set tokenizer’s input to input. // Set tokenizer’s policy to policy. @@ -505,11 +506,15 @@ tl::expected, url_pattern_errors> tokenize( // index. tokenizer.seek_and_get_next_code_point(tokenizer.index); + ada_log("tokenizer.code_point: ", tokenizer.code_point); + ada_log("tokenizer.index: ", tokenizer.index); + // If tokenizer’s code point is U+002A (*): if (tokenizer.code_point == '*') { // Run add a token with default position and length given tokenizer and // "asterisk". tokenizer.add_token_with_defaults(token_type::ASTERISK); + ada_log("add ASTERISK token"); // Continue. continue; } @@ -519,6 +524,7 @@ tl::expected, url_pattern_errors> tokenize( // Run add a token with default position and length given tokenizer and // "other-modifier". tokenizer.add_token_with_defaults(token_type::OTHER_MODIFIER); + ada_log("add OTHER_MODIFIER token"); // Continue. continue; } @@ -532,6 +538,7 @@ tl::expected, url_pattern_errors> tokenize( // index, and tokenizer’s index. if (auto error = tokenizer.process_tokenizing_error( tokenizer.next_index, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*error); } continue; @@ -545,6 +552,8 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s next index, and escaped index. tokenizer.add_token(token_type::ESCAPED_CHAR, tokenizer.next_index, escaped_index); + ada_log("add ESCAPED_CHAR token on next_index ", tokenizer.next_index, + " with escaped index ", escaped_index); // Continue. continue; } @@ -554,6 +563,7 @@ tl::expected, url_pattern_errors> tokenize( // Run add a token with default position and length given tokenizer and // "open". tokenizer.add_token_with_defaults(token_type::OPEN); + ada_log("add OPEN token"); continue; } @@ -562,6 +572,7 @@ tl::expected, url_pattern_errors> tokenize( // Run add a token with default position and length given tokenizer and // "close". tokenizer.add_token_with_defaults(token_type::CLOSE); + ada_log("add CLOSE token"); continue; } @@ -583,6 +594,8 @@ tl::expected, url_pattern_errors> tokenize( // point given tokenizer’s code point and first code point. auto valid_code_point = idna::valid_name_code_point( std::string_view{&tokenizer.code_point, 1}, first_code_point); + ada_log("tokenizer.code_point: ", tokenizer.code_point, + " is_valid_name_code_point: ", valid_code_point); // If valid code point is false break. if (!valid_code_point) break; // Set name position to tokenizer’s next index. @@ -595,6 +608,7 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s index. if (auto error = tokenizer.process_tokenizing_error(name_start, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*error); } // Continue @@ -604,6 +618,8 @@ tl::expected, url_pattern_errors> tokenize( // Run add a token with default length given tokenizer, "name", name // position, and name start. tokenizer.add_token(token_type::NAME, name_position, name_start); + ada_log("add NAME token on name_position ", name_position, + " with name_start ", name_start); continue; } @@ -633,6 +649,7 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s index. if (auto process_error = tokenizer.process_tokenizing_error( regexp_start, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*process_error); } // Set error to true. @@ -647,6 +664,7 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s index. if (auto process_error = tokenizer.process_tokenizing_error( regexp_start, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*process_error); } // Set error to true; @@ -662,6 +680,7 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s index. if (auto process_error = tokenizer.process_tokenizing_error( regexp_start, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*process_error); } // Set error to true. @@ -678,6 +697,7 @@ tl::expected, url_pattern_errors> tokenize( if (auto process_error = tokenizer.process_tokenizing_error( regexp_start, tokenizer.index); process_error.has_value()) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*process_error); } // Set error to true. @@ -711,6 +731,7 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s index. if (auto process_error = tokenizer.process_tokenizing_error( regexp_start, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*process_error); } // Set error to true. @@ -727,6 +748,7 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s index. if (auto process_error = tokenizer.process_tokenizing_error( regexp_start, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*process_error); } // Set error to true. @@ -748,6 +770,7 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s index. if (auto process_error = tokenizer.process_tokenizing_error( regexp_start, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*process_error); } continue; @@ -760,6 +783,7 @@ tl::expected, url_pattern_errors> tokenize( // tokenizer’s index. if (auto process_error = tokenizer.process_tokenizing_error( regexp_start, tokenizer.index)) { + ada_log("process_tokenizing_error failed"); return tl::unexpected(*process_error); } continue; @@ -768,6 +792,9 @@ tl::expected, url_pattern_errors> tokenize( // start, and regexp length. tokenizer.add_token(token_type::REGEXP, regexp_position, regexp_start, regexp_length); + ada_log("add REGEXP token on regexp_position ", regexp_position, + " with regexp_start ", regexp_start, " and regexp_length ", + regexp_length); continue; } // Run add a token with default position and length given tokenizer and @@ -777,6 +804,9 @@ tl::expected, url_pattern_errors> tokenize( // Run add a token with default length given tokenizer, "end", tokenizer’s // index, and tokenizer’s index. tokenizer.add_token(token_type::END, tokenizer.index, tokenizer.index); + ada_log("add token END"); + + ada_log("tokenizer.token_list size is: ", tokenizer.token_list.size()); // Return tokenizer’s token list. return std::move(tokenizer.token_list); } @@ -889,6 +919,7 @@ parse_pattern_string(std::string_view input, // "strict". auto tokenize_result = tokenize(input, token_policy::STRICT); if (!tokenize_result) { + ada_log("parse_pattern_string tokenize failed"); return tl::unexpected(tokenize_result.error()); } parser.tokens = std::move(*tokenize_result); @@ -920,6 +951,7 @@ parse_pattern_string(std::string_view input, } // Run maybe add a part from the pending fixed value given parser. if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) { + ada_log("maybe_add_part_from_the_pending_fixed_value failed"); return tl::unexpected(*error); } // Let modifier token be the result of running try to consume a modifier @@ -930,6 +962,7 @@ parse_pattern_string(std::string_view input, if (auto error = parser.add_part(prefix, name_token, regexp_or_wildcard_token, {}, modifier_token)) { + ada_log("parser.add_part failed"); return tl::unexpected(*error); } // Continue. @@ -967,6 +1000,7 @@ parse_pattern_string(std::string_view input, auto suffix_ = parser.consume_text(); // Run consume a required token given parser and "close". if (!parser.consume_required_token(token_type::CLOSE)) { + ada_log("parser.consume_required_token failed"); return tl::unexpected(url_pattern_errors::type_error); } // Set modifier token to the result of running try to consume a modifier @@ -977,6 +1011,7 @@ parse_pattern_string(std::string_view input, if (auto error = parser.add_part(prefix_, name_token, regexp_or_wildcard_token, suffix_, modifier_token)) { + ada_log("parser.add_part failed on line 984"); return tl::unexpected(*error); } // Continue. @@ -984,13 +1019,16 @@ parse_pattern_string(std::string_view input, } // Run maybe add a part from the pending fixed value given parser. if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) { + ada_log("maybe_add_part_from_the_pending_fixed_value failed on line 992"); return tl::unexpected(*error); } // Run consume a required token given parser and "end". if (!parser.consume_required_token(token_type::END)) { + ada_log("parser.consume_required_token failed"); return tl::unexpected(url_pattern_errors::type_error); } } + ada_log("parser.parts size is: ", parser.parts.size()); // Return parser’s part list. return parser.parts; }