From 75da92a54e9dc2258d9f79448824233e2b023948 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Wed, 18 Oct 2023 20:26:43 -0400 Subject: [PATCH] verifies and fix issue 50235 in node. --- include/ada/url_aggregator-inl.h | 29 ++++++++++++++++++++--------- tests/basic_tests.cpp | 12 ++++++++++++ 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/include/ada/url_aggregator-inl.h b/include/ada/url_aggregator-inl.h index a6b185698..fd29d753b 100644 --- a/include/ada/url_aggregator-inl.h +++ b/include/ada/url_aggregator-inl.h @@ -817,20 +817,31 @@ inline bool url_aggregator::has_port() const noexcept { // is greater than 1, and url's path[0] is the empty string, then append // U+002F (/) followed by U+002E (.) to output. ada_log("url_aggregator::has_dash_dot"); - // Performance: instead of doing this potentially expensive check, we could - // just have a boolean value in the structure. #if ADA_DEVELOPMENT_CHECKS - if (components.pathname_start + 1 < buffer.size() && - components.pathname_start == components.host_end + 2) { - ADA_ASSERT_TRUE(buffer[components.host_end] == '/'); - ADA_ASSERT_TRUE(buffer[components.host_end + 1] == '.'); + // If pathname_start and host_end are exactly two characters apart, then we + // either have a one-digit port such as http://test.com:5?param=1 or else we + // have a /.: sequence such as "non-spec:/.//". We test that this is the case. + if (components.pathname_start == components.host_end + 2) { + ADA_ASSERT_TRUE((buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.') || + (buffer[components.host_end] == ':' && + checkers::is_digit(buffer[components.host_end + 1]))); + } + if (&&components.pathname_start == components.host_end + 2 && + buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.') { + ADA_ASSERT_TRUE(components.pathname_start + 1 < buffer.size()); ADA_ASSERT_TRUE(buffer[components.pathname_start] == '/'); ADA_ASSERT_TRUE(buffer[components.pathname_start + 1] == '/'); } #endif - return !has_opaque_path && - components.pathname_start == components.host_end + 2 && - components.pathname_start + 1 < buffer.size(); + // Performance: it should be uncommon for components.pathname_start == + // components.host_end + 2 to be true. So we put this check first in the + // sequence. Most times, we do not have an opaque path. Checking for '/.' is + // more expensive, but should be uncommon. + return components.pathname_start == components.host_end + 2 && + !has_opaque_path && buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.'; } [[nodiscard]] inline std::string_view url_aggregator::get_href() diff --git a/tests/basic_tests.cpp b/tests/basic_tests.cpp index 6fe5e5c13..49d7315fc 100644 --- a/tests/basic_tests.cpp +++ b/tests/basic_tests.cpp @@ -387,3 +387,15 @@ TYPED_TEST(basic_tests, nodejs_49650) { ASSERT_EQ(out->get_href(), "http://foo/"); SUCCEED(); } + +// https://github.com/nodejs/node/issues/50235 +TYPED_TEST(basic_tests, nodejs_50235) { + auto out = ada::parse("http://test.com:5/?param=1"); + ASSERT_TRUE(out); + ASSERT_TRUE(out->set_pathname("path")); + std::cout << "====================" << std::endl; + std::cout << out->get_href() << std::endl; + std::cout << "====================" << std::endl; + ASSERT_EQ(out->get_href(), "http://test.com:5/path?param=1"); + SUCCEED(); +}