Skip to content

Commit

Permalink
add more logging
Browse files Browse the repository at this point in the history
  • Loading branch information
anonrig committed Dec 23, 2024
1 parent 776f670 commit 5a2e4ee
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 12 deletions.
4 changes: 2 additions & 2 deletions include/ada/url_pattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ struct url_pattern_compile_component_options {
explicit url_pattern_compile_component_options(
std::optional<char> new_delimiter = std::nullopt,
std::optional<char> new_prefix = std::nullopt)
: delimiter(new_delimiter), prefix(new_prefix){}
: delimiter(new_delimiter), prefix(new_prefix) {}

std::string_view get_delimiter() const ada_warn_unused;
std::string_view get_prefix() const ada_warn_unused;
Expand Down Expand Up @@ -191,7 +191,7 @@ class url_pattern_component {
flags(std::move(new_flags)),
regexp(std::move(new_regexp)),
group_name_list(std::move(new_group_name_list)),
has_regexp_groups_(new_has_regexp_groups){}
has_regexp_groups_(new_has_regexp_groups) {}

// @see https://urlpattern.spec.whatwg.org/#compile-a-component
template <url_pattern_encoding_callback F>
Expand Down
6 changes: 5 additions & 1 deletion include/ada/url_pattern_helpers-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,11 +402,15 @@ template <url_pattern_encoding_callback F>
std::optional<url_pattern_errors>
url_pattern_parser<F>::maybe_add_part_from_the_pending_fixed_value() {
// If parser’s pending fixed value is the empty string, then return.
if (pending_fixed_value.empty()) return std::nullopt;
if (pending_fixed_value.empty()) {
ada_log("pending_fixed_value is empty");
return std::nullopt;
}
// Let encoded value be the result of running parser’s encoding callback given
// parser’s pending fixed value.
auto encoded_value = encoding_callback(pending_fixed_value);
if (!encoded_value) {
ada_log("failed to encode pending_fixed_value: ", pending_fixed_value);
return encoded_value.error();
}
// Set parser’s pending fixed value to the empty string.
Expand Down
2 changes: 1 addition & 1 deletion include/ada/url_pattern_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class Tokenizer {
struct constructor_string_parser {
explicit constructor_string_parser(std::string_view new_input,
std::vector<Token>& new_token_list)
: input(new_input), token_list(new_token_list){}
: input(new_input), token_list(new_token_list) {}

// @see https://urlpattern.spec.whatwg.org/#rewind
void rewind();
Expand Down
33 changes: 33 additions & 0 deletions src/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,13 +912,15 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
auto parse_result = url_pattern_helpers::constructor_string_parser::parse(
std::get<std::string_view>(input));
if (!parse_result) {
ada_log("constructor_string_parser::parse failed");
return tl::unexpected(parse_result.error());
}
init = *parse_result;

// If baseURL is null and init["protocol"] does not exist, then throw a
// TypeError.
if (!base_url && !init.protocol) {
ada_log("base url is null and protocol is not set");
return tl::unexpected(url_pattern_errors::type_error);
}

Expand All @@ -931,6 +933,7 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
ADA_ASSERT_TRUE(std::holds_alternative<url_pattern_init>(input));
// If baseURL is not null, then throw a TypeError.
if (base_url) {
ada_log("base url is not null");
return tl::unexpected(url_pattern_errors::type_error);
}
// Optimization: Avoid copy by moving the input value.
Expand All @@ -944,6 +947,7 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
init, "pattern", std::nullopt, std::nullopt, std::nullopt, std::nullopt,
std::nullopt, std::nullopt, std::nullopt, std::nullopt);
if (!processed_init) {
ada_log("url_pattern_init::process failed for init and 'pattern'");
return tl::unexpected(processed_init.error());
}

Expand All @@ -961,6 +965,15 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
if (!processed_init->search) processed_init->search = "*";
if (!processed_init->hash) processed_init->hash = "*";

ada_log("-- processed_init->protocol: ", processed_init->protocol.value());
ada_log("-- processed_init->username: ", processed_init->username.value());
ada_log("-- processed_init->password: ", processed_init->password.value());
ada_log("-- processed_init->hostname: ", processed_init->hostname.value());
ada_log("-- processed_init->port: ", processed_init->port.value());
ada_log("-- processed_init->pathname: ", processed_init->pathname.value());
ada_log("-- processed_init->search: ", processed_init->search.value());
ada_log("-- processed_init->hash: ", processed_init->hash.value());

// If processedInit["protocol"] is a special scheme and processedInit["port"]
// is a string which represents its corresponding default port in radix-10
// using ASCII digits then set processedInit["port"] to the empty string.
Expand All @@ -982,6 +995,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
url_pattern_helpers::canonicalize_protocol,
url_pattern_compile_component_options::DEFAULT);
if (!protocol_component) {
ada_log("url_pattern_component::compile failed for protocol ",
processed_init->protocol.value());
return tl::unexpected(protocol_component.error());
}
url_pattern_.protocol_component = std::move(*protocol_component);
Expand All @@ -994,6 +1009,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
url_pattern_helpers::canonicalize_username,
url_pattern_compile_component_options::DEFAULT);
if (!username_component) {
ada_log("url_pattern_component::compile failed for username ",
processed_init->username.value());
return tl::unexpected(username_component.error());
}
url_pattern_.username_component = std::move(*username_component);
Expand All @@ -1006,6 +1023,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
url_pattern_helpers::canonicalize_password,
url_pattern_compile_component_options::DEFAULT);
if (!password_component) {
ada_log("url_pattern_component::compile failed for password ",
processed_init->password.value());
return tl::unexpected(password_component.error());
}
url_pattern_.password_component = std::move(*password_component);
Expand All @@ -1022,6 +1041,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
url_pattern_helpers::canonicalize_hostname,
url_pattern_compile_component_options::DEFAULT);
if (!hostname_component) {
ada_log("url_pattern_component::compile failed for ipv6 hostname ",
processed_init->hostname.value());
return tl::unexpected(hostname_component.error());
}
url_pattern_.hostname_component = std::move(*hostname_component);
Expand All @@ -1034,6 +1055,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
url_pattern_helpers::canonicalize_hostname,
url_pattern_compile_component_options::HOSTNAME);
if (!hostname_component) {
ada_log("url_pattern_component::compile failed for hostname ",
processed_init->hostname.value());
return tl::unexpected(hostname_component.error());
}
url_pattern_.hostname_component = std::move(*hostname_component);
Expand All @@ -1045,6 +1068,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
processed_init->port.value(), url_pattern_helpers::canonicalize_port,
url_pattern_compile_component_options::DEFAULT);
if (!port_component) {
ada_log("url_pattern_component::compile failed for port ",
processed_init->port.value());
return tl::unexpected(port_component.error());
}
url_pattern_.port_component = std::move(*port_component);
Expand Down Expand Up @@ -1075,6 +1100,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
processed_init->pathname.value(),
url_pattern_helpers::canonicalize_pathname, path_compile_options);
if (!pathname_component) {
ada_log("url_pattern_component::compile failed for pathname ",
processed_init->pathname.value());
return tl::unexpected(pathname_component.error());
}
url_pattern_.pathname_component = std::move(*pathname_component);
Expand All @@ -1086,6 +1113,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
processed_init->pathname.value(),
url_pattern_helpers::canonicalize_opaque_pathname, compile_options);
if (!pathname_component) {
ada_log("url_pattern_component::compile failed for opaque pathname ",
processed_init->pathname.value());
return tl::unexpected(pathname_component.error());
}
url_pattern_.pathname_component = std::move(*pathname_component);
Expand All @@ -1097,6 +1126,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
processed_init->search.value(), url_pattern_helpers::canonicalize_search,
compile_options);
if (!search_component) {
ada_log("url_pattern_component::compile failed for search ",
processed_init->search.value());
return tl::unexpected(search_component.error());
}
url_pattern_.search_component = std::move(*search_component);
Expand All @@ -1107,6 +1138,8 @@ tl::expected<url_pattern, url_pattern_errors> parse_url_pattern_impl(
processed_init->hash.value(), url_pattern_helpers::canonicalize_hash,
compile_options);
if (!hash_component) {
ada_log("url_pattern_component::compile failed for hash ",
processed_init->hash.value());
return tl::unexpected(hash_component.error());
}
url_pattern_.hash_component = std::move(*hash_component);
Expand Down
41 changes: 33 additions & 8 deletions src/url_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,12 +492,14 @@ template <url_pattern_encoding_callback F>
tl::expected<url_pattern_component, url_pattern_errors>
url_pattern_component::compile(std::string_view input, F encoding_callback,
url_pattern_compile_component_options& options) {
ada_log("url_pattern_component::compile input: ", input);
// Let part list be the result of running parse a pattern string given input,
// options, and encoding callback.
auto part_list = url_pattern_helpers::parse_pattern_string(input, options,
encoding_callback);

if (!part_list) {
ada_log("parse_pattern_string failed");
return tl::unexpected(part_list.error());
}

Expand All @@ -507,6 +509,8 @@ url_pattern_component::compile(std::string_view input, F encoding_callback,
url_pattern_helpers::generate_regular_expression_and_name_list(*part_list,
options);

ada_log("regular expression string: ", regular_expression_string);

// Let flags be an empty string.
// If options’s ignore case is true then set flags to "vi".
// Otherwise set flags to "v"
Expand All @@ -527,6 +531,8 @@ url_pattern_component::compile(std::string_view input, F encoding_callback,
const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);

ada_log("has regexp groups: ", has_regexp_groups);

// Return a new component whose pattern string is pattern string, regular
// expression is regular expression, group name list is name list, and has
// regexp groups is has regexp groups.
Expand Down Expand Up @@ -718,24 +724,26 @@ std::string generate_segment_wildcard_regexp(
// Append "]+?" to the end of result.
result.append("]+?");
// Return result.
ada_log("generate_segment_wildcard_regexp result: ", result);
return result;
}

bool protocol_component_matches_special_scheme(
ada::url_pattern_component& component) {
auto regex = component.get_regexp();
ada_log("protocol_component_matches_special_scheme regex: ", regex);
try {
std::regex rx(regex.data(), regex.size());
std::cmatch cmatch;
return std::regex_match("http", cmatch, rx) ||
std::regex_match("https", cmatch, rx) ||
std::regex_match("ws", cmatch, rx) ||
std::regex_match("wss", cmatch, rx) ||
std::regex_match("ftp", cmatch, rx);
std::regex_match("https", cmatch, rx) ||
std::regex_match("ws", cmatch, rx) ||
std::regex_match("wss", cmatch, rx) ||
std::regex_match("ftp", cmatch, rx);
} catch (...) {
// You probably want to log this error.
ada_log("Error while matching protocol component with special scheme");
ada_log("Regex Input: ", input);
ada_log("Regex Input: ", regex);
return false;
}
}
Expand Down Expand Up @@ -866,7 +874,10 @@ url_pattern::match(url_pattern_input&& input,
url = parsed_url.value();

// Set protocol to url’s scheme.
protocol = url.get_protocol();
// IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
// is removed. Similar work was done on workerd:
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
protocol = url.get_protocol().substr(0, url.get_protocol().size() - 1);
// Set username to url’s username.
username = url.get_username();
// Set password to url’s password.
Expand All @@ -880,9 +891,23 @@ url_pattern::match(url_pattern_input&& input,
// Set pathname to the result of URL path serializing url.
pathname = url.get_pathname();
// Set search to url’s query or the empty string if the value is null.
search = url.get_search();
// IMPORTANT: Not documented on the URLPattern spec, but search prefix '?'
// is removed. Similar work was done on workerd:
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
if (url.has_search()) {
search = url.get_search().substr(1);
} else {
search = "";
}
// Set hash to url’s fragment or the empty string if the value is null.
hash = url.get_hash();
// IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
// removed. Similar work was done on workerd:
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
if (url.has_hash()) {
hash = url.get_hash().substr(1);
} else {
hash = "";
}
}

// TODO: Make this function pluggable using a parameter.
Expand Down
Loading

0 comments on commit 5a2e4ee

Please sign in to comment.