diff --git a/ada.cpp b/ada.cpp index 1759de2..a7be606 100644 --- a/ada.cpp +++ b/ada.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2023-05-19 00:02:33 -0400. Do not edit! */ +/* auto-generated on 2024-03-07 13:23:39 -0500. Do not edit! */ /* begin file src/ada.cpp */ #include "ada.h" /* begin file src/checkers.cpp */ @@ -7,62 +7,79 @@ namespace ada::checkers { ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept { - size_t last_dot = view.rfind('.'); - if (last_dot == view.size() - 1) { + // The string is not empty and does not contain upper case ASCII characters. + // + // Optimization. To be considered as a possible ipv4, the string must end + // with 'x' or a lowercase hex character. + // Most of the time, this will be false so this simple check will save a lot + // of effort. + char last_char = view.back(); + // If the address ends with a dot, we need to prune it (special case). + if (last_char == '.') { view.remove_suffix(1); - last_dot = view.rfind('.'); + if (view.empty()) { + return false; + } + last_char = view.back(); } - std::string_view number = - (last_dot == std::string_view::npos) ? view : view.substr(last_dot + 1); - if (number.empty()) { + bool possible_ipv4 = (last_char >= '0' && last_char <= '9') || + (last_char >= 'a' && last_char <= 'f') || + last_char == 'x'; + if (!possible_ipv4) { return false; } + // From the last character, find the last dot. + size_t last_dot = view.rfind('.'); + if (last_dot != std::string_view::npos) { + // We have at least one dot. + view = view.substr(last_dot + 1); + } /** Optimization opportunity: we have basically identified the last number of the ipv4 if we return true here. We might as well parse it and have at least one number parsed when we get to parse_ipv4. */ - if (std::all_of(number.begin(), number.end(), ada::checkers::is_digit)) { + if (std::all_of(view.begin(), view.end(), ada::checkers::is_digit)) { + return true; + } + // It could be hex (0x), but not if there is a single character. + if (view.size() == 1) { + return false; + } + // It must start with 0x. + if (!std::equal(view.begin(), view.begin() + 2, "0x")) { + return false; + } + // We must allow "0x". + if (view.size() == 2) { return true; } - return (checkers::has_hex_prefix(number) && - std::all_of(number.begin() + 2, number.end(), - ada::unicode::is_lowercase_hex)); + // We have 0x followed by some characters, we need to check that they are + // hexadecimals. + return std::all_of(view.begin() + 2, view.end(), + ada::unicode::is_lowercase_hex); } // for use with path_signature, we include all characters that need percent // encoding. -static constexpr uint8_t path_signature_table[256] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; -static_assert(path_signature_table[uint8_t('?')] == 1); -static_assert(path_signature_table[uint8_t('`')] == 1); -static_assert(path_signature_table[uint8_t('{')] == 1); -static_assert(path_signature_table[uint8_t('}')] == 1); -// -static_assert(path_signature_table[uint8_t(' ')] == 1); -static_assert(path_signature_table[uint8_t('?')] == 1); -static_assert(path_signature_table[uint8_t('"')] == 1); -static_assert(path_signature_table[uint8_t('#')] == 1); -static_assert(path_signature_table[uint8_t('<')] == 1); -static_assert(path_signature_table[uint8_t('>')] == 1); -static_assert(path_signature_table[uint8_t('\\')] == 2); -static_assert(path_signature_table[uint8_t('.')] == 4); -static_assert(path_signature_table[uint8_t('%')] == 8); - -// -static_assert(path_signature_table[0] == 1); -static_assert(path_signature_table[31] == 1); -static_assert(path_signature_table[127] == 1); -static_assert(path_signature_table[128] == 1); -static_assert(path_signature_table[255] == 1); +static constexpr std::array path_signature_table = + []() constexpr { + std::array result{}; + for (size_t i = 0; i < 256; i++) { + if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e || + i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7b || i == 0x7d || + i > 0x7e) { + result[i] = 1; + } else if (i == 0x25) { + result[i] = 8; + } else if (i == 0x2e) { + result[i] = 4; + } else if (i == 0x5c) { + result[i] = 2; + } else { + result[i] = 0; + } + } + return result; + }(); ada_really_inline constexpr uint8_t path_signature( std::string_view input) noexcept { @@ -116,10 +133,11 @@ ada_really_inline constexpr bool verify_dns_length( ADA_PUSH_DISABLE_ALL_WARNINGS /* begin file src/ada_idna.cpp */ -/* auto-generated on 2023-05-07 19:12:14 -0400. Do not edit! */ +/* auto-generated on 2023-09-19 15:58:51 -0400. Do not edit! */ /* begin file src/idna.cpp */ /* begin file src/unicode_transcoding.cpp */ +#include #include #include @@ -226,38 +244,22 @@ size_t utf8_length_from_utf32(const char32_t* buf, size_t len) { // We are not BOM aware. const uint32_t* p = reinterpret_cast(buf); size_t counter{0}; - for (size_t i = 0; i < len; i++) { - /** ASCII **/ - if (p[i] <= 0x7F) { - counter++; - } - /** two-byte **/ - else if (p[i] <= 0x7FF) { - counter += 2; - } - /** three-byte **/ - else if (p[i] <= 0xFFFF) { - counter += 3; - } - /** four-bytes **/ - else { - counter += 4; - } + for (size_t i = 0; i != len; ++i) { + ++counter; // ASCII + counter += static_cast(p[i] > 0x7F); // two-byte + counter += static_cast(p[i] > 0x7FF); // three-byte + counter += static_cast(p[i] > 0xFFFF); // four-bytes } return counter; } size_t utf32_length_from_utf8(const char* buf, size_t len) { const int8_t* p = reinterpret_cast(buf); - size_t counter{0}; - for (size_t i = 0; i < len; i++) { + return std::count_if(p, std::next(p, len), [](int8_t c) { // -65 is 0b10111111, anything larger in two-complement's // should start a new code point. - if (p[i] > -65) { - counter++; - } - } - return counter; + return c > -65; + }); } size_t utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) { @@ -2829,8 +2831,6 @@ std::u32string map(std::u32string_view input) { break; case 2: return error; // disallowed - break; - // case 3 : default: // We have a mapping @@ -7750,7 +7750,7 @@ const char32_t composition_data[1883] = { namespace ada::idna { // See -// https://github.composition_count/uni-algo/uni-algo/blob/c612968c5ed3ace39bde4c894c24286c5f2c7fe2/include/uni_algo/impl/impl_norm.h#L467 +// https://github.com/uni-algo/uni-algo/blob/c612968c5ed3ace39bde4c894c24286c5f2c7fe2/include/uni_algo/impl/impl_norm.h#L467 constexpr char32_t hangul_sbase = 0xAC00; constexpr char32_t hangul_tbase = 0x11A7; constexpr char32_t hangul_vbase = 0x1161; @@ -9313,7 +9313,7 @@ bool is_label_valid(const std::u32string_view label) { // - For Nontransitional Processing, each value must be either valid or // deviation. - // If CheckJoiners, the label must satisify the ContextJ rules from Appendix + // If CheckJoiners, the label must satisfy the ContextJ rules from Appendix // A, in The Unicode Code Points and Internationalized Domain Names for // Applications (IDNA) [IDNA2008]. constexpr static uint32_t virama[] = { @@ -9522,19 +9522,20 @@ bool is_label_valid(const std::u32string_view label) { namespace ada::idna { -bool constexpr begins_with(std::u32string_view view, - std::u32string_view prefix) { +bool begins_with(std::u32string_view view, std::u32string_view prefix) { if (view.size() < prefix.size()) { return false; } - return view.substr(0, prefix.size()) == prefix; + // constexpr as of C++20 + return std::equal(prefix.begin(), prefix.end(), view.begin()); } -bool constexpr begins_with(std::string_view view, std::string_view prefix) { +bool begins_with(std::string_view view, std::string_view prefix) { if (view.size() < prefix.size()) { return false; } - return view.substr(0, prefix.size()) == prefix; + // constexpr as of C++20 + return std::equal(prefix.begin(), prefix.end(), view.begin()); } bool constexpr is_ascii(std::u32string_view view) { @@ -9794,10 +9795,11 @@ ADA_POP_DISABLE_WARNINGS namespace ada::unicode { +constexpr uint64_t broadcast(uint8_t v) noexcept { + return 0x101010101010101ull * v; +} + constexpr bool to_lower_ascii(char* input, size_t length) noexcept { - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; - }; uint64_t broadcast_80 = broadcast(0x80); uint64_t broadcast_Ap = broadcast(128 - 'A'); uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1); @@ -9825,6 +9827,17 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept { #if ADA_NEON ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept { + // first check for short strings in which case we do it naively. + if (user_input.size() < 16) { // slow path + for (size_t i = 0; i < user_input.size(); i++) { + if (user_input[i] == '\r' || user_input[i] == '\n' || + user_input[i] == '\t') { + return true; + } + } + return false; + } + // fast path for long strings (expected to be common) size_t i = 0; const uint8x16_t mask1 = vmovq_n_u8('\r'); const uint8x16_t mask2 = vmovq_n_u8('\n'); @@ -9837,9 +9850,8 @@ ada_really_inline bool has_tabs_or_newline( vceqq_u8(word, mask3)); } if (i < user_input.size()) { - uint8_t buffer[16]{}; - memcpy(buffer, user_input.data() + i, user_input.size() - i); - uint8x16_t word = vld1q_u8((const uint8_t*)user_input.data() + i); + uint8x16_t word = + vld1q_u8((const uint8_t*)user_input.data() + user_input.length() - 16); running = vorrq_u8(vorrq_u8(running, vorrq_u8(vceqq_u8(word, mask1), vceqq_u8(word, mask2))), vceqq_u8(word, mask3)); @@ -9849,6 +9861,17 @@ ada_really_inline bool has_tabs_or_newline( #elif ADA_SSE2 ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept { + // first check for short strings in which case we do it naively. + if (user_input.size() < 16) { // slow path + for (size_t i = 0; i < user_input.size(); i++) { + if (user_input[i] == '\r' || user_input[i] == '\n' || + user_input[i] == '\t') { + return true; + } + } + return false; + } + // fast path for long strings (expected to be common) size_t i = 0; const __m128i mask1 = _mm_set1_epi8('\r'); const __m128i mask2 = _mm_set1_epi8('\n'); @@ -9862,9 +9885,8 @@ ada_really_inline bool has_tabs_or_newline( _mm_cmpeq_epi8(word, mask3)); } if (i < user_input.size()) { - uint8_t buffer[16]{}; - memcpy(buffer, user_input.data() + i, user_input.size() - i); - __m128i word = _mm_loadu_si128((const __m128i*)buffer); + __m128i word = _mm_loadu_si128( + (const __m128i*)(user_input.data() + user_input.length() - 16)); running = _mm_or_si128( _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1), _mm_cmpeq_epi8(word, mask2))), @@ -9878,9 +9900,6 @@ ada_really_inline bool has_tabs_or_newline( auto has_zero_byte = [](uint64_t v) { return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; - }; size_t i = 0; uint64_t mask1 = broadcast('\r'); uint64_t mask2 = broadcast('\n'); @@ -9910,56 +9929,36 @@ ada_really_inline bool has_tabs_or_newline( // U+0020 SPACE, U+0023 (#), U+002F (/), U+003A (:), U+003C (<), U+003E (>), // U+003F (?), U+0040 (@), U+005B ([), U+005C (\), U+005D (]), U+005E (^), or // U+007C (|). -constexpr static bool is_forbidden_host_code_point_table[] = { - 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -static_assert(sizeof(is_forbidden_host_code_point_table) == 256); +constexpr static std::array is_forbidden_host_code_point_table = + []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|'}) { + result[c] = true; + } + return result; + }(); ada_really_inline constexpr bool is_forbidden_host_code_point( const char c) noexcept { return is_forbidden_host_code_point_table[uint8_t(c)]; } -static_assert(unicode::is_forbidden_host_code_point('\0')); -static_assert(unicode::is_forbidden_host_code_point('\t')); -static_assert(unicode::is_forbidden_host_code_point('\n')); -static_assert(unicode::is_forbidden_host_code_point('\r')); -static_assert(unicode::is_forbidden_host_code_point(' ')); -static_assert(unicode::is_forbidden_host_code_point('#')); -static_assert(unicode::is_forbidden_host_code_point('/')); -static_assert(unicode::is_forbidden_host_code_point(':')); -static_assert(unicode::is_forbidden_host_code_point('?')); -static_assert(unicode::is_forbidden_host_code_point('@')); -static_assert(unicode::is_forbidden_host_code_point('[')); -static_assert(unicode::is_forbidden_host_code_point('?')); -static_assert(unicode::is_forbidden_host_code_point('<')); -static_assert(unicode::is_forbidden_host_code_point('>')); -static_assert(unicode::is_forbidden_host_code_point('\\')); -static_assert(unicode::is_forbidden_host_code_point(']')); -static_assert(unicode::is_forbidden_host_code_point('^')); -static_assert(unicode::is_forbidden_host_code_point('|')); - -constexpr static uint8_t is_forbidden_domain_code_point_table[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; +constexpr static std::array is_forbidden_domain_code_point_table = + []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|', '%'}) { + result[c] = true; + } + for (uint8_t c = 0; c <= 32; c++) { + result[c] = true; + } + for (size_t c = 127; c < 255; c++) { + result[c] = true; + } + return result; + }(); static_assert(sizeof(is_forbidden_domain_code_point_table) == 256); @@ -9984,22 +9983,24 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point( return accumulator; } -constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - -static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256); -static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2); -static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2); +constexpr static std::array + is_forbidden_domain_code_point_table_or_upper = []() constexpr { + std::array result{}; + for (uint8_t c : {'\0', '\x09', '\x0a', '\x0d', ' ', '#', '/', ':', '<', + '>', '?', '@', '[', '\\', ']', '^', '|', '%'}) { + result[c] = 1; + } + for (uint8_t c = 'A'; c <= 'Z'; c++) { + result[c] = 2; + } + for (uint8_t c = 0; c <= 32; c++) { + result[c] = 1; + } + for (size_t c = 127; c < 255; c++) { + result[c] = 1; + } + return result; + }(); ada_really_inline constexpr uint8_t contains_forbidden_domain_code_point_or_upper(const char* input, @@ -10023,41 +10024,22 @@ contains_forbidden_domain_code_point_or_upper(const char* input, return accumulator; } -static_assert(unicode::is_forbidden_domain_code_point('%')); -static_assert(unicode::is_forbidden_domain_code_point('\x7f')); -static_assert(unicode::is_forbidden_domain_code_point('\0')); -static_assert(unicode::is_forbidden_domain_code_point('\t')); -static_assert(unicode::is_forbidden_domain_code_point('\n')); -static_assert(unicode::is_forbidden_domain_code_point('\r')); -static_assert(unicode::is_forbidden_domain_code_point(' ')); -static_assert(unicode::is_forbidden_domain_code_point('#')); -static_assert(unicode::is_forbidden_domain_code_point('/')); -static_assert(unicode::is_forbidden_domain_code_point(':')); -static_assert(unicode::is_forbidden_domain_code_point('?')); -static_assert(unicode::is_forbidden_domain_code_point('@')); -static_assert(unicode::is_forbidden_domain_code_point('[')); -static_assert(unicode::is_forbidden_domain_code_point('?')); -static_assert(unicode::is_forbidden_domain_code_point('<')); -static_assert(unicode::is_forbidden_domain_code_point('>')); -static_assert(unicode::is_forbidden_domain_code_point('\\')); -static_assert(unicode::is_forbidden_domain_code_point(']')); -static_assert(unicode::is_forbidden_domain_code_point('^')); -static_assert(unicode::is_forbidden_domain_code_point('|')); - -constexpr static bool is_alnum_plus_table[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - -static_assert(sizeof(is_alnum_plus_table) == 256); +// std::isalnum(c) || c == '+' || c == '-' || c == '.') is true for +constexpr static std::array is_alnum_plus_table = []() constexpr { + std::array result{}; + for (size_t c = 0; c < 256; c++) { + if (c >= '0' && c <= '9') { + result[c] = true; + } else if (c >= 'a' && c <= 'z') { + result[c] = true; + } else if (c >= 'A' && c <= 'Z') { + result[c] = true; + } else if (c == '+' || c == '-' || c == '.') { + result[c] = true; + } + } + return result; +}(); ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept { return is_alnum_plus_table[uint8_t(c)]; @@ -10065,13 +10047,6 @@ ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept { // following under most compilers: return // return (std::isalnum(c) || c == '+' || c == '-' || c == '.'); } -static_assert(unicode::is_alnum_plus('+')); -static_assert(unicode::is_alnum_plus('-')); -static_assert(unicode::is_alnum_plus('.')); -static_assert(unicode::is_alnum_plus('0')); -static_assert(unicode::is_alnum_plus('1')); -static_assert(unicode::is_alnum_plus('a')); -static_assert(unicode::is_alnum_plus('b')); ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || @@ -10148,13 +10123,12 @@ ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); } +constexpr static char hex_to_binary_table[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, + 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15}; unsigned constexpr convert_hex_to_binary(const char c) noexcept { - // this code can be optimized. - if (c <= '9') { - return c - '0'; - } - char del = c >= 'a' ? 'a' : 'A'; - return 10 + (c - del); + return hex_to_binary_table[c - '0']; } std::string percent_decode(const std::string_view input, size_t first_percent) { @@ -10163,8 +10137,9 @@ std::string percent_decode(const std::string_view input, size_t first_percent) { if (first_percent == std::string_view::npos) { return std::string(input); } - std::string dest(input.substr(0, first_percent)); + std::string dest; dest.reserve(input.length()); + dest.append(input.substr(0, first_percent)); const char* pointer = input.data() + first_percent; const char* end = input.data() + input.size(); // Optimization opportunity: if the following code gets @@ -10201,9 +10176,10 @@ std::string percent_encode(const std::string_view input, return std::string(input); } - std::string result(input.substr(0, std::distance(input.begin(), pointer))); + std::string result; result.reserve(input.length()); // in the worst case, percent encoding might // produce 3 characters. + result.append(input.substr(0, std::distance(input.begin(), pointer))); for (; pointer != input.end(); pointer++) { if (character_sets::bit_at(character_set, *pointer)) { @@ -10542,8 +10518,8 @@ ada_really_inline bool shorten_path(std::string& path, ada::scheme::type type) noexcept { size_t first_delimiter = path.find_first_of('/', 1); - // Let path be url’s path. - // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized + // Let path be url's path. + // If url's scheme is "file", path's size is 1, and path[0] is a normalized // Windows drive letter, then return. if (type == ada::scheme::type::FILE && first_delimiter == std::string_view::npos && !path.empty()) { @@ -10553,7 +10529,7 @@ ada_really_inline bool shorten_path(std::string& path, } } - // Remove path’s last item, if any. + // Remove path's last item, if any. size_t last_delimiter = path.rfind('/'); if (last_delimiter != std::string::npos) { path.erase(last_delimiter); @@ -10567,8 +10543,8 @@ ada_really_inline bool shorten_path(std::string_view& path, ada::scheme::type type) noexcept { size_t first_delimiter = path.find_first_of('/', 1); - // Let path be url’s path. - // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized + // Let path be url's path. + // If url's scheme is "file", path's size is 1, and path[0] is a normalized // Windows drive letter, then return. if (type == ada::scheme::type::FILE && first_delimiter == std::string_view::npos && !path.empty()) { @@ -10578,7 +10554,7 @@ ada_really_inline bool shorten_path(std::string_view& path, } } - // Remove path’s last item, if any. + // Remove path's last item, if any. if (!path.empty()) { size_t slash_loc = path.rfind('/'); if (slash_loc != std::string_view::npos) { @@ -10604,7 +10580,7 @@ ada_really_inline void remove_ascii_tab_or_newline( ada_really_inline std::string_view substring(std::string_view input, size_t pos) noexcept { ADA_ASSERT_TRUE(pos <= input.size()); - // The following is safer but uneeded if we have the above line: + // The following is safer but unneeded if we have the above line: // return pos > input.size() ? std::string_view() : input.substr(pos); return input.substr(pos); } @@ -10614,155 +10590,302 @@ ada_really_inline void resize(std::string_view& input, size_t pos) noexcept { input.remove_suffix(input.size() - pos); } -// Reverse the byte order. -ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept { - // performance: this often compiles to a single instruction (e.g., bswap) - return ((((val)&0xff00000000000000ull) >> 56) | - (((val)&0x00ff000000000000ull) >> 40) | - (((val)&0x0000ff0000000000ull) >> 24) | - (((val)&0x000000ff00000000ull) >> 8) | - (((val)&0x00000000ff000000ull) << 8) | - (((val)&0x0000000000ff0000ull) << 24) | - (((val)&0x000000000000ff00ull) << 40) | - (((val)&0x00000000000000ffull) << 56)); -} - -ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept { - // performance: under little-endian systems (most systems), this function - // is free (just returns the input). -#if ADA_IS_BIG_ENDIAN - return swap_bytes(val); -#else - return val; // unchanged (trivial) -#endif +// computes the number of trailing zeroes +// this is a private inline function only defined in this source file. +ada_really_inline int trailing_zeroes(uint32_t input_num) noexcept { +#ifdef ADA_REGULAR_VISUAL_STUDIO + unsigned long ret; + // Search the mask data from least significant bit (LSB) + // to the most significant bit (MSB) for a set bit (1). + _BitScanForward(&ret, input_num); + return (int)ret; +#else // ADA_REGULAR_VISUAL_STUDIO + return __builtin_ctzl(input_num); +#endif // ADA_REGULAR_VISUAL_STUDIO } // starting at index location, this finds the next location of a character // :, /, \\, ? or [. If none is found, view.size() is returned. // For use within get_host_delimiter_location. +#if ADA_NEON +// The ada_make_uint8x16_t macro is necessary because Visual Studio does not +// support direct initialization of uint8x16_t. See +// https://developercommunity.visualstudio.com/t/error-C2078:-too-many-initializers-whe/402911?q=backend+neon +#ifndef ada_make_uint8x16_t +#define ada_make_uint8x16_t(x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, \ + x13, x14, x15, x16) \ + ([=]() { \ + static uint8_t array[16] = {x1, x2, x3, x4, x5, x6, x7, x8, \ + x9, x10, x11, x12, x13, x14, x15, x16}; \ + return vld1q_u8(array); \ + }()) +#endif + ada_really_inline size_t find_next_host_delimiter_special( std::string_view view, size_t location) noexcept { - // performance: if you plan to call find_next_host_delimiter more than once, - // you *really* want find_next_host_delimiter to be inlined, because - // otherwise, the constants may get reloaded each time (bad). - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '\\' || + view[i] == '?' || view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + auto to_bitmask = [](uint8x16_t input) -> uint16_t { + uint8x16_t bit_mask = + ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, + 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + uint8x16_t minput = vandq_u8(input, bit_mask); + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); }; + + // fast path for long strings (expected to be common) size_t i = location; - uint64_t mask1 = broadcast(':'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('\\'); - uint64_t mask4 = broadcast('?'); - uint64_t mask5 = broadcast('['); - // This loop will get autovectorized under many optimizing compilers, - // so you get actually SIMD! - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - // performance: the next memcpy translates into a single CPU instruction. - memcpy(&word, view.data() + i, sizeof(word)); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4) | - has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t low_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x04, 0x04, 0x00, 0x00, 0x03); + uint8x16_t high_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + uint8x16_t fmask = vmovq_n_u8(0xf); + uint8x16_t zero{0}; + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t*)view.data() + i); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return i + trailing_zeroes(is_non_zero); } } + if (i < view.size()) { - uint64_t word{}; - // performance: the next memcpy translates into a function call, but - // that is difficult to avoid. Might be a bit expensive. - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4) | - has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t word = + vld1q_u8((const uint8_t*)view.data() + view.length() - 16); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return view.length() - 16 + trailing_zeroes(is_non_zero); + } + } + return size_t(view.size()); +} +#elif ADA_SSE2 +ada_really_inline size_t find_next_host_delimiter_special( + std::string_view view, size_t location) noexcept { + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '\\' || + view[i] == '?' || view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + // fast path for long strings (expected to be common) + size_t i = location; + const __m128i mask1 = _mm_set1_epi8(':'); + const __m128i mask2 = _mm_set1_epi8('/'); + const __m128i mask3 = _mm_set1_epi8('\\'); + const __m128i mask4 = _mm_set1_epi8('?'); + const __m128i mask5 = _mm_set1_epi8('['); + + for (; i + 15 < view.size(); i += 16) { + __m128i word = _mm_loadu_si128((const __m128i*)(view.data() + i)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m3 = _mm_cmpeq_epi8(word, mask3); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128( + _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return i + trailing_zeroes(mask); + } + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128((const __m128i*)(view.data() + view.length() - 16)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m3 = _mm_cmpeq_epi8(word, mask3); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128( + _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m3, m4)), m5); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return view.length() - 16 + trailing_zeroes(mask); + } + } + return size_t(view.length()); +} +#else +// : / [ \\ ? +static constexpr std::array special_host_delimiters = + []() constexpr { + std::array result{}; + for (int i : {':', '/', '[', '\\', '?'}) { + result[i] = 1; + } + return result; + }(); +// credit: @the-moisrex recommended a table-based approach +ada_really_inline size_t find_next_host_delimiter_special( + std::string_view view, size_t location) noexcept { + auto const str = view.substr(location); + for (auto pos = str.begin(); pos != str.end(); ++pos) { + if (special_host_delimiters[(uint8_t)*pos]) { + return pos - str.begin() + location; } } - return view.size(); + return size_t(view.size()); } +#endif // starting at index location, this finds the next location of a character // :, /, ? or [. If none is found, view.size() is returned. // For use within get_host_delimiter_location. +#if ADA_NEON ada_really_inline size_t find_next_host_delimiter(std::string_view view, size_t location) noexcept { - // performance: if you plan to call find_next_host_delimiter more than once, - // you *really* want find_next_host_delimiter to be inlined, because - // otherwise, the constants may get reloaded each time (bad). - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '?' || + view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + auto to_bitmask = [](uint8x16_t input) -> uint16_t { + uint8x16_t bit_mask = + ada_make_uint8x16_t(0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, + 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80); + uint8x16_t minput = vandq_u8(input, bit_mask); + uint8x16_t tmp = vpaddq_u8(minput, minput); + tmp = vpaddq_u8(tmp, tmp); + tmp = vpaddq_u8(tmp, tmp); + return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0); }; + + // fast path for long strings (expected to be common) size_t i = location; - uint64_t mask1 = broadcast(':'); - uint64_t mask2 = broadcast('/'); - uint64_t mask4 = broadcast('?'); - uint64_t mask5 = broadcast('['); - // This loop will get autovectorized under many optimizing compilers, - // so you get actually SIMD! - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - // performance: the next memcpy translates into a single CPU instruction. - memcpy(&word, view.data() + i, sizeof(word)); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor4) | has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t low_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x04, 0x00, 0x00, 0x00, 0x03); + uint8x16_t high_mask = + ada_make_uint8x16_t(0x00, 0x00, 0x02, 0x01, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + uint8x16_t fmask = vmovq_n_u8(0xf); + uint8x16_t zero{0}; + for (; i + 15 < view.size(); i += 16) { + uint8x16_t word = vld1q_u8((const uint8_t*)view.data() + i); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return i + trailing_zeroes(is_non_zero); } } + if (i < view.size()) { - uint64_t word{}; - // performance: the next memcpy translates into a function call, but - // that is difficult to avoid. Might be a bit expensive. - memcpy(&word, view.data() + i, view.size() - i); - // performance: on little-endian systems (most systems), this next line is - // free. - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor4 = word ^ mask4; - uint64_t xor5 = word ^ mask5; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor4) | has_zero_byte(xor5); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + uint8x16_t word = + vld1q_u8((const uint8_t*)view.data() + view.length() - 16); + uint8x16_t lowpart = vqtbl1q_u8(low_mask, vandq_u8(word, fmask)); + uint8x16_t highpart = vqtbl1q_u8(high_mask, vshrq_n_u8(word, 4)); + uint8x16_t classify = vandq_u8(lowpart, highpart); + if (vmaxvq_u8(classify) != 0) { + uint8x16_t is_zero = vceqq_u8(classify, zero); + uint16_t is_non_zero = ~to_bitmask(is_zero); + return view.length() - 16 + trailing_zeroes(is_non_zero); } } - return view.size(); + return size_t(view.size()); } +#elif ADA_SSE2 +ada_really_inline size_t find_next_host_delimiter(std::string_view view, + size_t location) noexcept { + // first check for short strings in which case we do it naively. + if (view.size() - location < 16) { // slow path + for (size_t i = location; i < view.size(); i++) { + if (view[i] == ':' || view[i] == '/' || view[i] == '?' || + view[i] == '[') { + return i; + } + } + return size_t(view.size()); + } + // fast path for long strings (expected to be common) + size_t i = location; + const __m128i mask1 = _mm_set1_epi8(':'); + const __m128i mask2 = _mm_set1_epi8('/'); + const __m128i mask4 = _mm_set1_epi8('?'); + const __m128i mask5 = _mm_set1_epi8('['); + + for (; i + 15 < view.size(); i += 16) { + __m128i word = _mm_loadu_si128((const __m128i*)(view.data() + i)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5)); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return i + trailing_zeroes(mask); + } + } + if (i < view.size()) { + __m128i word = + _mm_loadu_si128((const __m128i*)(view.data() + view.length() - 16)); + __m128i m1 = _mm_cmpeq_epi8(word, mask1); + __m128i m2 = _mm_cmpeq_epi8(word, mask2); + __m128i m4 = _mm_cmpeq_epi8(word, mask4); + __m128i m5 = _mm_cmpeq_epi8(word, mask5); + __m128i m = _mm_or_si128(_mm_or_si128(m1, m2), _mm_or_si128(m4, m5)); + int mask = _mm_movemask_epi8(m); + if (mask != 0) { + return view.length() - 16 + trailing_zeroes(mask); + } + } + return size_t(view.length()); +} +#else +// : / [ ? +static constexpr std::array host_delimiters = []() constexpr { + std::array result{}; + for (int i : {':', '/', '?', '['}) { + result[i] = 1; + } + return result; +}(); +// credit: @the-moisrex recommended a table-based approach +ada_really_inline size_t find_next_host_delimiter(std::string_view view, + size_t location) noexcept { + auto const str = view.substr(location); + for (auto pos = str.begin(); pos != str.end(); ++pos) { + if (host_delimiters[(uint8_t)*pos]) { + return pos - str.begin() + location; + } + } + return size_t(view.size()); +} +#endif ada_really_inline std::pair get_host_delimiter_location( const bool is_special, std::string_view& view) noexcept { @@ -10991,7 +11114,7 @@ ada_really_inline void parse_prepared_path(std::string_view input, } // Otherwise, if path_buffer is not a single-dot path segment, then: else if (!unicode::is_single_dot_path_segment(path_buffer)) { - // If url’s scheme is "file", url’s path is empty, and path_buffer is a + // If url's scheme is "file", url's path is empty, and path_buffer is a // Windows drive letter, then replace the second code point in // path_buffer with U+003A (:). if (type == ada::scheme::type::FILE && path.empty() && @@ -11002,7 +11125,7 @@ ada_really_inline void parse_prepared_path(std::string_view input, path_buffer.remove_prefix(2); path.append(path_buffer); } else { - // Append path_buffer to url’s path. + // Append path_buffer to url's path. path += '/'; path.append(path_buffer); } @@ -11037,101 +11160,47 @@ ada_really_inline void strip_trailing_spaces_from_opaque_path( url.update_base_pathname(path); } +// @ / \\ ? +static constexpr std::array authority_delimiter_special = + []() constexpr { + std::array result{}; + for (int i : {'@', '/', '\\', '?'}) { + result[i] = 1; + } + return result; + }(); +// credit: @the-moisrex recommended a table-based approach ada_really_inline size_t find_authority_delimiter_special(std::string_view view) noexcept { - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; - }; - size_t i = 0; - uint64_t mask1 = broadcast('@'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('?'); - uint64_t mask4 = broadcast('\\'); - - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - memcpy(&word, view.data() + i, sizeof(word)); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + // performance note: we might be able to gain further performance + // with SIMD instrinsics. + for (auto pos = view.begin(); pos != view.end(); ++pos) { + if (authority_delimiter_special[(uint8_t)*pos]) { + return pos - view.begin(); } } - - if (i < view.size()) { - uint64_t word{}; - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t xor4 = word ^ mask4; - uint64_t is_match = has_zero_byte(xor1) | has_zero_byte(xor2) | - has_zero_byte(xor3) | has_zero_byte(xor4); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); - } - } - - return view.size(); + return size_t(view.size()); } +// @ / ? +static constexpr std::array authority_delimiter = []() constexpr { + std::array result{}; + for (int i : {'@', '/', '?'}) { + result[i] = 1; + } + return result; +}(); +// credit: @the-moisrex recommended a table-based approach ada_really_inline size_t find_authority_delimiter(std::string_view view) noexcept { - auto has_zero_byte = [](uint64_t v) { - return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080); - }; - auto index_of_first_set_byte = [](uint64_t v) { - return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1; - }; - auto broadcast = [](uint8_t v) -> uint64_t { - return 0x101010101010101ull * v; - }; - size_t i = 0; - uint64_t mask1 = broadcast('@'); - uint64_t mask2 = broadcast('/'); - uint64_t mask3 = broadcast('?'); - - for (; i + 7 < view.size(); i += 8) { - uint64_t word{}; - memcpy(&word, view.data() + i, sizeof(word)); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t is_match = - has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); + // performance note: we might be able to gain further performance + // with SIMD instrinsics. + for (auto pos = view.begin(); pos != view.end(); ++pos) { + if (authority_delimiter[(uint8_t)*pos]) { + return pos - view.begin(); } } - - if (i < view.size()) { - uint64_t word{}; - memcpy(&word, view.data() + i, view.size() - i); - word = swap_bytes_if_big_endian(word); - uint64_t xor1 = word ^ mask1; - uint64_t xor2 = word ^ mask2; - uint64_t xor3 = word ^ mask3; - uint64_t is_match = - has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3); - if (is_match) { - return size_t(i + index_of_first_set_byte(is_match)); - } - } - - return view.size(); + return size_t(view.size()); } } // namespace ada::helpers @@ -11140,6 +11209,7 @@ namespace ada { ada_warn_unused std::string to_string(ada::state state) { return ada::helpers::get_state(state); } +#undef ada_make_uint8x16_t } // namespace ada /* end file src/helpers.cpp */ /* begin file src/url.cpp */ @@ -11151,7 +11221,7 @@ ada_warn_unused std::string to_string(ada::state state) { namespace ada { bool url::parse_opaque_host(std::string_view input) { - ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]"); + ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]"); if (std::any_of(input.begin(), input.end(), ada::unicode::is_forbidden_host_code_point)) { return is_valid = false; @@ -11165,7 +11235,7 @@ bool url::parse_opaque_host(std::string_view input) { } bool url::parse_ipv4(std::string_view input) { - ada_log("parse_ipv4 ", input, "[", input.size(), " bytes]"); + ada_log("parse_ipv4 ", input, " [", input.size(), " bytes]"); if (input.back() == '.') { input.remove_suffix(1); } @@ -11207,7 +11277,7 @@ bool url::parse_ipv4(std::string_view input) { // We have the last value. // At this stage, ipv4 contains digit_count*8 bits. // So we have 32-digit_count*8 bits left. - if (segment_result > (uint64_t(1) << (32 - digit_count * 8))) { + if (segment_result >= (uint64_t(1) << (32 - digit_count * 8))) { return is_valid = false; } ipv4 <<= (32 - digit_count * 8); @@ -11235,11 +11305,12 @@ bool url::parse_ipv4(std::string_view input) { } else { host = ada::serializers::ipv4(ipv4); // We have to reserialize the address. } + host_type = IPV4; return true; } bool url::parse_ipv6(std::string_view input) { - ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]"); + ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]"); if (input.empty()) { return is_valid = false; @@ -11299,7 +11370,7 @@ bool url::parse_ipv6(std::string_view input) { uint16_t value = 0, length = 0; // While length is less than 4 and c is an ASCII hex digit, - // set value to value × 0x10 + c interpreted as hexadecimal number, and + // set value to value times 0x10 + c interpreted as hexadecimal number, and // increase pointer and length by 1. while (length < 4 && pointer != input.end() && unicode::is_ascii_hex_digit(*pointer)) { @@ -11370,7 +11441,7 @@ bool url::parse_ipv6(std::string_view input) { ada_log("parse_ipv6 if ipv4Piece is 0, validation error"); return is_valid = false; } - // Otherwise, set ipv4Piece to ipv4Piece × 10 + number. + // Otherwise, set ipv4Piece to ipv4Piece times 10 + number. else { ipv4_piece = *ipv4_piece * 10 + number; } @@ -11385,7 +11456,8 @@ bool url::parse_ipv6(std::string_view input) { pointer++; } - // Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece. + // Set address[pieceIndex] to address[pieceIndex] times 0x100 + + // ipv4Piece. // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int address[piece_index] = uint16_t(address[piece_index] * 0x100 + *ipv4_piece); @@ -11438,14 +11510,14 @@ bool url::parse_ipv6(std::string_view input) { // If compress is non-null, then: if (compress.has_value()) { - // Let swaps be pieceIndex − compress. + // Let swaps be pieceIndex - compress. int swaps = piece_index - *compress; // Set pieceIndex to 7. piece_index = 7; // While pieceIndex is not 0 and swaps is greater than 0, - // swap address[pieceIndex] with address[compress + swaps − 1], and then + // swap address[pieceIndex] with address[compress + swaps - 1], and then // decrease both pieceIndex and swaps by 1. while (piece_index != 0 && swaps > 0) { std::swap(address[piece_index], address[*compress + swaps - 1]); @@ -11463,6 +11535,7 @@ bool url::parse_ipv6(std::string_view input) { } host = ada::serializers::ipv6(address); ada_log("parse_ipv6 ", *host); + host_type = IPV6; return true; } @@ -11476,7 +11549,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) { **/ if (is_input_special) { // fast path!!! if (has_state_override) { - // If url’s scheme is not a special scheme and buffer is a special scheme, + // If url's scheme is not a special scheme and buffer is a special scheme, // then return. if (is_special() != is_input_special) { return true; @@ -11489,7 +11562,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) { return true; } - // If url’s scheme is "file" and its host is an empty host, then return. + // If url's scheme is "file" and its host is an empty host, then return. // An empty host is the empty string. if (type == ada::scheme::type::FILE && host.has_value() && host.value().empty()) { @@ -11504,7 +11577,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) { uint16_t urls_scheme_port = get_special_port(); if (urls_scheme_port) { - // If url’s port is url’s scheme’s default port, then set url’s port to + // If url's port is url's scheme's default port, then set url's port to // null. if (port.has_value() && *port == urls_scheme_port) { port = std::nullopt; @@ -11520,8 +11593,8 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) { unicode::to_lower_ascii(_buffer.data(), _buffer.size()); if (has_state_override) { - // If url’s scheme is a special scheme and buffer is not a special scheme, - // then return. If url’s scheme is not a special scheme and buffer is a + // If url's scheme is a special scheme and buffer is not a special scheme, + // then return. If url's scheme is not a special scheme and buffer is a // special scheme, then return. if (is_special() != ada::scheme::is_special(_buffer)) { return true; @@ -11533,7 +11606,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) { return true; } - // If url’s scheme is "file" and its host is an empty host, then return. + // If url's scheme is "file" and its host is an empty host, then return. // An empty host is the empty string. if (type == ada::scheme::type::FILE && host.has_value() && host.value().empty()) { @@ -11548,7 +11621,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) { uint16_t urls_scheme_port = get_special_port(); if (urls_scheme_port) { - // If url’s port is url’s scheme’s default port, then set url’s port to + // If url's port is url's scheme's default port, then set url's port to // null. if (port.has_value() && *port == urls_scheme_port) { port = std::nullopt; @@ -11561,7 +11634,7 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) { } ada_really_inline bool url::parse_host(std::string_view input) { - ada_log("parse_host ", input, "[", input.size(), " bytes]"); + ada_log("parse_host ", input, " [", input.size(), " bytes]"); if (input.empty()) { return is_valid = false; } // technically unnecessary. @@ -11613,6 +11686,8 @@ ada_really_inline bool url::parse_host(std::string_view input) { ada_log("parse_host to_ascii returns false"); return is_valid = false; } + ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(), + " bytes]"); if (std::any_of(host.value().begin(), host.value().end(), ada::unicode::is_forbidden_domain_code_point)) { @@ -11623,7 +11698,7 @@ ada_really_inline bool url::parse_host(std::string_view input) { // If asciiDomain ends in a number, then return the result of IPv4 parsing // asciiDomain. if (checkers::is_ipv4(host.value())) { - ada_log("parse_host got ipv4", *host); + ada_log("parse_host got ipv4 ", *host); return parse_ipv4(host.value()); } @@ -11668,10 +11743,9 @@ ada_really_inline void url::parse_path(std::string_view input) { path = "/"; } } - return; } -std::string url::to_string() const { +[[nodiscard]] std::string url::to_string() const { if (!is_valid) { return "null"; } @@ -11751,7 +11825,10 @@ namespace ada { if (non_special_scheme == "blob") { if (!path.empty()) { auto result = ada::parse(path); - if (result && result->is_special()) { + if (result && + (result->type == scheme::HTTP || result->type == scheme::HTTPS)) { + // If pathURL's scheme is not "http" and not "https", then return a + // new opaque origin. return ada::helpers::concat(result->get_protocol(), "//", result->get_host()); } @@ -11771,9 +11848,9 @@ namespace ada { } [[nodiscard]] std::string url::get_host() const noexcept { - // If url’s host is null, then return the empty string. - // If url’s port is null, return url’s host, serialized. - // Return url’s host, serialized, followed by U+003A (:) and url’s port, + // If url's host is null, then return the empty string. + // If url's port is null, return url's host, serialized. + // Return url's host, serialized, followed by U+003A (:) and url's port, // serialized. if (!host.has_value()) { return ""; @@ -11788,13 +11865,13 @@ namespace ada { return host.value_or(""); } -[[nodiscard]] const std::string_view url::get_pathname() const noexcept { +[[nodiscard]] std::string_view url::get_pathname() const noexcept { return path; } [[nodiscard]] std::string url::get_search() const noexcept { - // If this’s URL’s query is either null or the empty string, then return the - // empty string. Return U+003F (?), followed by this’s URL’s query. + // If this's URL's query is either null or the empty string, then return the + // empty string. Return U+003F (?), followed by this's URL's query. return (!query.has_value() || (query.value().empty())) ? "" : "?" + query.value(); } @@ -11812,8 +11889,8 @@ namespace ada { } [[nodiscard]] std::string url::get_hash() const noexcept { - // If this’s URL’s fragment is either null or the empty string, then return - // the empty string. Return U+0023 (#), followed by this’s URL’s fragment. + // If this's URL's fragment is either null or the empty string, then return + // the empty string. Return U+0023 (#), followed by this's URL's fragment. return (!hash.has_value() || (hash.value().empty())) ? "" : "#" + hash.value(); } @@ -11868,7 +11945,7 @@ bool url::set_host_or_hostname(const std::string_view input) { } // If url is special and host_view is the empty string, validation error, // return failure. Otherwise, if state override is given, host_view is the - // empty string, and either url includes credentials or url’s port is + // empty string, and either url includes credentials or url's port is // non-null, return. else if (host_view.empty() && (is_special() || has_credentials() || port.has_value())) { @@ -11876,7 +11953,7 @@ bool url::set_host_or_hostname(const std::string_view input) { } // Let host be the result of host parsing host_view with url is not special. - if (host_view.empty()) { + if (host_view.empty() && !is_special()) { host = ""; return true; } @@ -11895,7 +11972,7 @@ bool url::set_host_or_hostname(const std::string_view input) { } if (new_host.empty()) { - // Set url’s host to the empty string. + // Set url's host to the empty string. host = ""; } else { // Let host be the result of host parsing buffer with url is not special. @@ -12105,7 +12182,7 @@ result_type parse_url(std::string_view user_input, // Most of the time, we just need user_input.size(). // In some instances, we may need a bit more. /////////////////////////// - // This is *very* important. This line should be removed + // This is *very* important. This line should *not* be removed // hastily. There are principled reasons why reserve is important // for performance. If you have a benchmark with small inputs, // it may not matter, but in other instances, it could. @@ -12200,13 +12277,13 @@ result_type parse_url(std::string_view user_input, } ada_log("SCHEME the scheme is ", url.get_protocol()); - // If url’s scheme is "file", then: + // If url's scheme is "file", then: if (url.type == ada::scheme::type::FILE) { // Set state to file state. state = ada::state::FILE; } - // Otherwise, if url is special, base is non-null, and base’s scheme - // is url’s scheme: Note: Doing base_url->scheme is unsafe if base_url + // Otherwise, if url is special, base is non-null, and base's scheme + // is url's scheme: Note: Doing base_url->scheme is unsafe if base_url // != nullptr is false. else if (url.is_special() && base_url != nullptr && base_url->type == url.type) { @@ -12225,7 +12302,7 @@ result_type parse_url(std::string_view user_input, state = ada::state::PATH_OR_AUTHORITY; input_position++; } - // Otherwise, set url’s path to the empty string and set state to + // Otherwise, set url's path to the empty string and set state to // opaque path state. else { state = ada::state::OPAQUE_PATH; @@ -12253,8 +12330,8 @@ result_type parse_url(std::string_view user_input, return url; } // Otherwise, if base has an opaque path and c is U+0023 (#), - // set url’s scheme to base’s scheme, url’s path to base’s path, url’s - // query to base’s query, and set state to fragment state. + // set url's scheme to base's scheme, url's path to base's path, url's + // query to base's query, and set state to fragment state. else if (base_url->has_opaque_path && fragment.has_value() && input_position == input_size) { ada_log("NO_SCHEME opaque base with fragment"); @@ -12271,7 +12348,7 @@ result_type parse_url(std::string_view user_input, url.update_unencoded_base_hash(*fragment); return url; } - // Otherwise, if base’s scheme is not "file", set state to relative + // Otherwise, if base's scheme is not "file", set state to relative // state and decrease pointer by 1. else if (base_url->type != ada::scheme::type::FILE) { ada_log("NO_SCHEME non-file relative path"); @@ -12444,7 +12521,7 @@ result_type parse_url(std::string_view user_input, ada_log("RELATIVE_SCHEME ", helpers::substring(url_data, input_position)); - // Set url’s scheme to base’s scheme. + // Set url's scheme to base's scheme. url.copy_scheme(*base_url); // If c is U+002F (/), then set state to relative slash state. @@ -12464,9 +12541,9 @@ result_type parse_url(std::string_view user_input, state = ada::state::RELATIVE_SLASH; } else { ada_log("RELATIVE_SCHEME otherwise"); - // Set url’s username to base’s username, url’s password to base’s - // password, url’s host to base’s host, url’s port to base’s port, - // url’s path to a clone of base’s path, and url’s query to base’s + // Set url's username to base's username, url's password to base's + // password, url's host to base's host, url's port to base's port, + // url's path to a clone of base's path, and url's query to base's // query. if constexpr (result_type_is_ada_url) { url.username = base_url->username; @@ -12492,7 +12569,7 @@ result_type parse_url(std::string_view user_input, url.has_opaque_path = base_url->has_opaque_path; - // If c is U+003F (?), then set url’s query to the empty string, and + // If c is U+003F (?), then set url's query to the empty string, and // state to query state. if ((input_position != input_size) && (url_data[input_position] == '?')) { @@ -12500,10 +12577,10 @@ result_type parse_url(std::string_view user_input, } // Otherwise, if c is not the EOF code point: else if (input_position != input_size) { - // Set url’s query to null. + // Set url's query to null. url.clear_search(); if constexpr (result_type_is_ada_url) { - // Shorten url’s path. + // Shorten url's path. helpers::shorten_path(url.path, url.type); } else { std::string_view path = url.get_pathname(); @@ -12536,10 +12613,10 @@ result_type parse_url(std::string_view user_input, state = ada::state::AUTHORITY; } // Otherwise, set - // - url’s username to base’s username, - // - url’s password to base’s password, - // - url’s host to base’s host, - // - url’s port to base’s port, + // - url's username to base's username, + // - url's password to base's password, + // - url's host to base's host, + // - url's port to base's port, // - state to path state, and then, decrease pointer by 1. else { if constexpr (result_type_is_ada_url) { @@ -12569,7 +12646,6 @@ result_type parse_url(std::string_view user_input, // If c is U+002F (/) and remaining starts with U+002F (/), // then set state to special authority ignore slashes state and increase // pointer by 1. - state = ada::state::SPECIAL_AUTHORITY_IGNORE_SLASHES; std::string_view view = helpers::substring(url_data, input_position); if (ada::checkers::begins_with(view, "//")) { input_position += 2; @@ -12601,7 +12677,7 @@ result_type parse_url(std::string_view user_input, : ada::character_sets::QUERY_PERCENT_ENCODE; // Percent-encode after encoding, with encoding, buffer, and - // queryPercentEncodeSet, and append the result to url’s query. + // queryPercentEncodeSet, and append the result to url's query. url.update_base_search(helpers::substring(url_data, input_position), query_percent_encode_set); ada_log("QUERY update_base_search completed "); @@ -12632,7 +12708,7 @@ result_type parse_url(std::string_view user_input, return url; } ada_log("HOST parsing results in ", url.get_hostname()); - // Set url’s host to host, buffer to the empty string, and state to + // Set url's host to host, buffer to the empty string, and state to // port state. state = ada::state::PORT; input_position++; @@ -12660,7 +12736,7 @@ result_type parse_url(std::string_view user_input, ada_log("HOST parsing results in ", url.get_hostname(), " href=", url.get_href()); - // Set url’s host to host, and state to path start state. + // Set url's host to host, and state to path start state. state = ada::state::PATH_START; } @@ -12669,7 +12745,7 @@ result_type parse_url(std::string_view user_input, case ada::state::OPAQUE_PATH: { ada_log("OPAQUE_PATH ", helpers::substring(url_data, input_position)); std::string_view view = helpers::substring(url_data, input_position); - // If c is U+003F (?), then set url’s query to the empty string and + // If c is U+003F (?), then set url's query to the empty string and // state to query state. size_t location = view.find('?'); if (location != std::string_view::npos) { @@ -12724,7 +12800,7 @@ result_type parse_url(std::string_view user_input, } } // Otherwise, if state override is not given and c is U+003F (?), - // set url’s query to the empty string and state to query state. + // set url's query to the empty string and state to query state. else if ((input_position != input_size) && (url_data[input_position] == '?')) { state = ada::state::QUERY; @@ -12778,12 +12854,12 @@ result_type parse_url(std::string_view user_input, input_position++; } else { ada_log("FILE_SLASH otherwise"); - // If base is non-null and base’s scheme is "file", then: + // If base is non-null and base's scheme is "file", then: // Note: it is unsafe to do base_url->scheme unless you know that // base_url_has_value() is true. if (base_url != nullptr && base_url->type == ada::scheme::type::FILE) { - // Set url’s host to base’s host. + // Set url's host to base's host. if constexpr (result_type_is_ada_url) { url.host = base_url->host; } else { @@ -12791,9 +12867,9 @@ result_type parse_url(std::string_view user_input, url.set_host(base_url->get_host()); } // If the code point substring from pointer to the end of input does - // not start with a Windows drive letter and base’s path[0] is a - // normalized Windows drive letter, then append base’s path[0] to - // url’s path. + // not start with a Windows drive letter and base's path[0] is a + // normalized Windows drive letter, then append base's path[0] to + // url's path. if (!base_url->get_pathname().empty()) { if (!checkers::is_windows_drive_letter( helpers::substring(url_data, input_position))) { @@ -12835,7 +12911,7 @@ result_type parse_url(std::string_view user_input, if (checkers::is_windows_drive_letter(file_host_buffer)) { state = ada::state::PATH; } else if (file_host_buffer.empty()) { - // Set url’s host to the empty string. + // Set url's host to the empty string. if constexpr (result_type_is_ada_url) { url.host = ""; } else { @@ -12876,7 +12952,7 @@ result_type parse_url(std::string_view user_input, url.set_protocol_as_file(); if constexpr (result_type_is_ada_url) { - // Set url’s host to the empty string. + // Set url's host to the empty string. url.host = ""; } else { url.update_base_hostname(""); @@ -12889,11 +12965,11 @@ result_type parse_url(std::string_view user_input, // Set state to file slash state. state = ada::state::FILE_SLASH; } - // Otherwise, if base is non-null and base’s scheme is "file": + // Otherwise, if base is non-null and base's scheme is "file": else if (base_url != nullptr && base_url->type == ada::scheme::type::FILE) { - // Set url’s host to base’s host, url’s path to a clone of base’s - // path, and url’s query to base’s query. + // Set url's host to base's host, url's path to a clone of base's + // path, and url's query to base's query. ada_log("FILE base non-null"); if constexpr (result_type_is_ada_url) { url.host = base_url->host; @@ -12908,17 +12984,17 @@ result_type parse_url(std::string_view user_input, } url.has_opaque_path = base_url->has_opaque_path; - // If c is U+003F (?), then set url’s query to the empty string and + // If c is U+003F (?), then set url's query to the empty string and // state to query state. if (input_position != input_size && url_data[input_position] == '?') { state = ada::state::QUERY; } // Otherwise, if c is not the EOF code point: else if (input_position != input_size) { - // Set url’s query to null. + // Set url's query to null. url.clear_search(); // If the code point substring from pointer to the end of input does - // not start with a Windows drive letter, then shorten url’s path. + // not start with a Windows drive letter, then shorten url's path. if (!checkers::is_windows_drive_letter(file_view)) { if constexpr (result_type_is_ada_url) { helpers::shorten_path(url.path, url.type); @@ -12931,7 +13007,7 @@ result_type parse_url(std::string_view user_input, } // Otherwise: else { - // Set url’s path to an empty list. + // Set url's path to an empty list. url.clear_pathname(); url.has_opaque_path = true; } @@ -12975,7 +13051,7 @@ template url_aggregator parse_url( namespace ada { -bool url_components::check_offset_consistency() const noexcept { +[[nodiscard]] bool url_components::check_offset_consistency() const noexcept { /** * https://user:pass@example.com:1234/foo/bar?baz#quux * | | | | ^^^^| | | @@ -13051,7 +13127,7 @@ bool url_components::check_offset_consistency() const noexcept { return true; } -std::string url_components::to_string() const { +[[nodiscard]] std::string url_components::to_string() const { std::string answer; auto back = std::back_insert_iterator(answer); answer.append("{\n"); @@ -13116,7 +13192,7 @@ template **/ if (is_input_special) { // fast path!!! if (has_state_override) { - // If url’s scheme is not a special scheme and buffer is a special scheme, + // If url's scheme is not a special scheme and buffer is a special scheme, // then return. if (is_special() != is_input_special) { return true; @@ -13129,7 +13205,7 @@ template return true; } - // If url’s scheme is "file" and its host is an empty host, then return. + // If url's scheme is "file" and its host is an empty host, then return. // An empty host is the empty string. if (type == ada::scheme::type::FILE && components.host_start == components.host_end) { @@ -13144,7 +13220,7 @@ template // This is uncommon. uint16_t urls_scheme_port = get_special_port(); - // If url’s port is url’s scheme’s default port, then set url’s port to + // If url's port is url's scheme's default port, then set url's port to // null. if (components.port == urls_scheme_port) { clear_port(); @@ -13158,8 +13234,8 @@ template unicode::to_lower_ascii(_buffer.data(), _buffer.size()); if (has_state_override) { - // If url’s scheme is a special scheme and buffer is not a special scheme, - // then return. If url’s scheme is not a special scheme and buffer is a + // If url's scheme is a special scheme and buffer is not a special scheme, + // then return. If url's scheme is not a special scheme and buffer is a // special scheme, then return. if (is_special() != ada::scheme::is_special(_buffer)) { return true; @@ -13172,7 +13248,7 @@ template return true; } - // If url’s scheme is "file" and its host is an empty host, then return. + // If url's scheme is "file" and its host is an empty host, then return. // An empty host is the empty string. if (type == ada::scheme::type::FILE && components.host_start == components.host_end) { @@ -13186,7 +13262,7 @@ template // This is uncommon. uint16_t urls_scheme_port = get_special_port(); - // If url’s port is url’s scheme’s default port, then set url’s port to + // If url's port is url's scheme's default port, then set url's port to // null. if (components.port == urls_scheme_port) { clear_port(); @@ -13497,7 +13573,7 @@ void url_aggregator::set_hash(const std::string_view input) { bool url_aggregator::set_href(const std::string_view input) { ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer)); - ada_log("url_aggregator::set_href ", input, "[", input.size(), " bytes]"); + ada_log("url_aggregator::set_href ", input, " [", input.size(), " bytes]"); ada::result out = ada::parse(input); ada_log("url_aggregator::set_href, success :", out.has_value()); @@ -13511,7 +13587,8 @@ bool url_aggregator::set_href(const std::string_view input) { } ada_really_inline bool url_aggregator::parse_host(std::string_view input) { - ada_log("url_aggregator:parse_host ", input, "[", input.size(), " bytes]"); + ada_log("url_aggregator:parse_host \"", input, "\" [", input.size(), + " bytes]"); ADA_ASSERT_TRUE(validate()); ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer)); if (input.empty()) { @@ -13561,7 +13638,7 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) { update_base_hostname(input); if (checkers::is_ipv4(get_hostname())) { ada_log("parse_host fast path ipv4"); - return parse_ipv4(get_hostname()); + return parse_ipv4(get_hostname(), true); } ada_log("parse_host fast path ", get_hostname()); return true; @@ -13577,6 +13654,8 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) { ada_log("parse_host to_ascii returns false"); return is_valid = false; } + ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(), + " bytes]"); if (std::any_of(host.value().begin(), host.value().end(), ada::unicode::is_forbidden_domain_code_point)) { @@ -13586,8 +13665,8 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) { // If asciiDomain ends in a number, then return the result of IPv4 parsing // asciiDomain. if (checkers::is_ipv4(host.value())) { - ada_log("parse_host got ipv4", *host); - return parse_ipv4(host.value()); + ada_log("parse_host got ipv4 ", *host); + return parse_ipv4(host.value(), false); } update_base_hostname(host.value()); @@ -13635,16 +13714,15 @@ bool url_aggregator::set_host_or_hostname(const std::string_view input) { } // If url is special and host_view is the empty string, validation error, // return failure. Otherwise, if state override is given, host_view is the - // empty string, and either url includes credentials or url’s port is + // empty string, and either url includes credentials or url's port is // non-null, return. else if (host_view.empty() && - (is_special() || has_credentials() || - components.port != url_components::omitted)) { + (is_special() || has_credentials() || has_port())) { return false; } // Let host be the result of host parsing host_view with url is not special. - if (host_view.empty()) { + if (host_view.empty() && !is_special()) { if (has_hostname()) { clear_hostname(); // easy! } else if (has_dash_dot()) { @@ -13671,7 +13749,7 @@ bool url_aggregator::set_host_or_hostname(const std::string_view input) { } if (new_host.empty()) { - // Set url’s host to the empty string. + // Set url's host to the empty string. clear_hostname(); } else { // Let host be the result of host parsing buffer with url is not special. @@ -13720,7 +13798,9 @@ bool url_aggregator::set_hostname(const std::string_view input) { std::string_view path = get_pathname(); if (!path.empty()) { auto out = ada::parse(path); - if (out && out->is_special()) { + if (out && (out->type == scheme::HTTP || out->type == scheme::HTTPS)) { + // If pathURL's scheme is not "http" and not "https", then return a + // new opaque origin. return helpers::concat(out->get_protocol(), "//", out->get_host()); } } @@ -13759,8 +13839,8 @@ bool url_aggregator::set_hostname(const std::string_view input) { [[nodiscard]] std::string_view url_aggregator::get_hash() const noexcept { ada_log("url_aggregator::get_hash"); - // If this’s URL’s fragment is either null or the empty string, then return - // the empty string. Return U+0023 (#), followed by this’s URL’s fragment. + // If this's URL's fragment is either null or the empty string, then return + // the empty string. Return U+0023 (#), followed by this's URL's fragment. if (components.hash_start == url_components::omitted) { return ""; } @@ -13818,8 +13898,8 @@ bool url_aggregator::set_hostname(const std::string_view input) { [[nodiscard]] std::string_view url_aggregator::get_search() const noexcept { ada_log("url_aggregator::get_search"); - // If this’s URL’s query is either null or the empty string, then return the - // empty string. Return U+003F (?), followed by this’s URL’s query. + // If this's URL's query is either null or the empty string, then return the + // empty string. Return U+003F (?), followed by this's URL's query. if (components.search_start == url_components::omitted) { return ""; } @@ -13838,8 +13918,8 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, 0, components.protocol_end); } -std::string ada::url_aggregator::to_string() const { - ada_log("url_aggregator::to_string buffer:", buffer, "[", buffer.size(), +[[nodiscard]] std::string ada::url_aggregator::to_string() const { + ada_log("url_aggregator::to_string buffer:", buffer, " [", buffer.size(), " bytes]"); if (!is_valid) { return "null"; @@ -13938,8 +14018,8 @@ std::string ada::url_aggregator::to_string() const { return checkers::verify_dns_length(get_hostname()); } -bool url_aggregator::parse_ipv4(std::string_view input) { - ada_log("parse_ipv4 ", input, "[", input.size(), +bool url_aggregator::parse_ipv4(std::string_view input, bool in_place) { + ada_log("parse_ipv4 ", input, " [", input.size(), " bytes], overlaps with buffer: ", helpers::overlaps(input, buffer) ? "yes" : "no"); ADA_ASSERT_TRUE(validate()); @@ -13963,27 +14043,32 @@ bool url_aggregator::parse_ipv4(std::string_view input) { } else { std::from_chars_result r; if (is_hex) { + ada_log("parse_ipv4 trying to parse hex number"); r = std::from_chars(input.data() + 2, input.data() + input.size(), segment_result, 16); } else if ((input.length() >= 2) && input[0] == '0' && checkers::is_digit(input[1])) { + ada_log("parse_ipv4 trying to parse octal number"); r = std::from_chars(input.data() + 1, input.data() + input.size(), segment_result, 8); } else { + ada_log("parse_ipv4 trying to parse decimal number"); pure_decimal_count++; r = std::from_chars(input.data(), input.data() + input.size(), segment_result, 10); } if (r.ec != std::errc()) { + ada_log("parse_ipv4 parsing failed"); return is_valid = false; } + ada_log("parse_ipv4 parsed ", segment_result); input.remove_prefix(r.ptr - input.data()); } if (input.empty()) { // We have the last value. // At this stage, ipv4 contains digit_count*8 bits. // So we have 32-digit_count*8 bits left. - if (segment_result > (uint64_t(1) << (32 - digit_count * 8))) { + if (segment_result >= (uint64_t(1) << (32 - digit_count * 8))) { return is_valid = false; } ipv4 <<= (32 - digit_count * 8); @@ -14001,6 +14086,7 @@ bool url_aggregator::parse_ipv4(std::string_view input) { } } if ((digit_count != 4) || (!input.empty())) { + ada_log("parse_ipv4 found invalid (more than 4 numbers or empty) "); return is_valid = false; } final: @@ -14008,10 +14094,14 @@ bool url_aggregator::parse_ipv4(std::string_view input) { " host: ", get_host()); // We could also check r.ptr to see where the parsing ended. - if (pure_decimal_count == 4 && !trailing_dot) { + if (in_place && pure_decimal_count == 4 && !trailing_dot) { + ada_log( + "url_aggregator::parse_ipv4 completed and was already correct in the " + "buffer"); // The original input was already all decimal and we validated it. So we // don't need to do anything. } else { + ada_log("url_aggregator::parse_ipv4 completed and we need to update it"); // Optimization opportunity: Get rid of unnecessary string return in ipv4 // serializer. // TODO: This is likely a bug because it goes back update_base_hostname, not @@ -14019,13 +14109,17 @@ bool url_aggregator::parse_ipv4(std::string_view input) { update_base_hostname( ada::serializers::ipv4(ipv4)); // We have to reserialize the address. } + host_type = IPV4; ADA_ASSERT_TRUE(validate()); return true; } bool url_aggregator::parse_ipv6(std::string_view input) { + // TODO: Implement in_place optimization: we know that input points + // in the buffer, so we can just check whether the buffer is already + // well formatted. // TODO: Find a way to merge parse_ipv6 with url.cpp implementation. - ada_log("parse_ipv6 ", input, "[", input.size(), " bytes]"); + ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]"); ADA_ASSERT_TRUE(validate()); ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer)); if (input.empty()) { @@ -14086,7 +14180,7 @@ bool url_aggregator::parse_ipv6(std::string_view input) { uint16_t value = 0, length = 0; // While length is less than 4 and c is an ASCII hex digit, - // set value to value × 0x10 + c interpreted as hexadecimal number, and + // set value to value times 0x10 + c interpreted as hexadecimal number, and // increase pointer and length by 1. while (length < 4 && pointer != input.end() && unicode::is_ascii_hex_digit(*pointer)) { @@ -14156,7 +14250,7 @@ bool url_aggregator::parse_ipv6(std::string_view input) { ada_log("parse_ipv6 if ipv4Piece is 0, validation error"); return is_valid = false; } - // Otherwise, set ipv4Piece to ipv4Piece × 10 + number. + // Otherwise, set ipv4Piece to ipv4Piece times 10 + number. else { ipv4_piece = *ipv4_piece * 10 + number; } @@ -14171,7 +14265,8 @@ bool url_aggregator::parse_ipv6(std::string_view input) { pointer++; } - // Set address[pieceIndex] to address[pieceIndex] × 0x100 + ipv4Piece. + // Set address[pieceIndex] to address[pieceIndex] times 0x100 + + // ipv4Piece. // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int address[piece_index] = uint16_t(address[piece_index] * 0x100 + *ipv4_piece); @@ -14224,14 +14319,14 @@ bool url_aggregator::parse_ipv6(std::string_view input) { // If compress is non-null, then: if (compress.has_value()) { - // Let swaps be pieceIndex − compress. + // Let swaps be pieceIndex - compress. int swaps = piece_index - *compress; // Set pieceIndex to 7. piece_index = 7; // While pieceIndex is not 0 and swaps is greater than 0, - // swap address[pieceIndex] with address[compress + swaps − 1], and then + // swap address[pieceIndex] with address[compress + swaps - 1], and then // decrease both pieceIndex and swaps by 1. while (piece_index != 0 && swaps > 0) { std::swap(address[piece_index], address[*compress + swaps - 1]); @@ -14253,11 +14348,12 @@ bool url_aggregator::parse_ipv6(std::string_view input) { update_base_hostname(ada::serializers::ipv6(address)); ada_log("parse_ipv6 ", get_hostname()); ADA_ASSERT_TRUE(validate()); + host_type = IPV6; return true; } bool url_aggregator::parse_opaque_host(std::string_view input) { - ada_log("parse_opaque_host ", input, "[", input.size(), " bytes]"); + ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]"); ADA_ASSERT_TRUE(validate()); ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer)); if (std::any_of(input.begin(), input.end(), @@ -14280,7 +14376,7 @@ bool url_aggregator::parse_opaque_host(std::string_view input) { return true; } -std::string url_aggregator::to_diagram() const { +[[nodiscard]] std::string url_aggregator::to_diagram() const { if (!is_valid) { return "invalid"; } @@ -14437,7 +14533,7 @@ std::string url_aggregator::to_diagram() const { return answer; } -bool url_aggregator::validate() const noexcept { +[[nodiscard]] bool url_aggregator::validate() const noexcept { if (!is_valid) { return true; } @@ -14772,7 +14868,7 @@ inline void url_aggregator::consume_prepared_path(std::string_view input) { } // Otherwise, if path_buffer is not a single-dot path segment, then: else if (!unicode::is_single_dot_path_segment(path_buffer)) { - // If url’s scheme is "file", url’s path is empty, and path_buffer is a + // If url's scheme is "file", url's path is empty, and path_buffer is a // Windows drive letter, then replace the second code point in // path_buffer with U+003A (:). if (type == ada::scheme::type::FILE && path.empty() && @@ -14783,7 +14879,7 @@ inline void url_aggregator::consume_prepared_path(std::string_view input) { path_buffer.remove_prefix(2); path.append(path_buffer); } else { - // Append path_buffer to url’s path. + // Append path_buffer to url's path. path += '/'; path.append(path_buffer); } @@ -14805,6 +14901,11 @@ ada::result& get_instance(void* result) noexcept { extern "C" { typedef void* ada_url; +typedef void* ada_url_search_params; +typedef void* ada_strings; +typedef void* ada_url_search_params_keys_iter; +typedef void* ada_url_search_params_values_iter; +typedef void* ada_url_search_params_entries_iter; struct ada_string { const char* data; @@ -14816,6 +14917,11 @@ struct ada_owned_string { size_t length; }; +struct ada_string_pair { + ada_string key; + ada_string value; +}; + ada_string ada_string_create(const char* data, size_t length) { ada_string out{}; out.data = data; @@ -14887,6 +14993,11 @@ void ada_free(ada_url result) noexcept { delete r; } +ada_url ada_copy(ada_url input) noexcept { + ada::result& r = get_instance(input); + return new ada::result(r); +} + bool ada_is_valid(ada_url result) noexcept { ada::result& r = get_instance(result); return r.has_value(); @@ -15004,6 +15115,22 @@ ada_string ada_get_protocol(ada_url result) noexcept { return ada_string_create(out.data(), out.length()); } +uint8_t ada_get_host_type(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (!r) { + return 0; + } + return r->host_type; +} + +uint8_t ada_get_scheme_type(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (!r) { + return 0; + } + return r->type; +} + bool ada_set_href(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (!r) { @@ -15073,6 +15200,13 @@ bool ada_set_pathname(ada_url result, const char* input, return r->set_pathname(std::string_view(input, length)); } +/** + * Update the search/query of the URL. + * + * If a URL has `?` as the search value, passing empty string to this function + * does not remove the attribute. If you need to remove it, please use + * `ada_clear_search` method. + */ void ada_set_search(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (r) { @@ -15080,6 +15214,13 @@ void ada_set_search(ada_url result, const char* input, size_t length) noexcept { } } +/** + * Update the hash/fragment of the URL. + * + * If a URL has `#` as the hash value, passing empty string to this function + * does not remove the attribute. If you need to remove it, please use + * `ada_clear_hash` method. + */ void ada_set_hash(ada_url result, const char* input, size_t length) noexcept { ada::result& r = get_instance(result); if (r) { @@ -15087,6 +15228,39 @@ void ada_set_hash(ada_url result, const char* input, size_t length) noexcept { } } +void ada_clear_port(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (r) { + r->clear_port(); + } +} + +/** + * Removes the hash of the URL. + * + * Despite `ada_set_hash` method, this function allows the complete + * removal of the hash attribute, even if it has a value of `#`. + */ +void ada_clear_hash(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (r) { + r->clear_hash(); + } +} + +/** + * Removes the search of the URL. + * + * Despite `ada_set_search` method, this function allows the complete + * removal of the search attribute, even if it has a value of `?`. + */ +void ada_clear_search(ada_url result) noexcept { + ada::result& r = get_instance(result); + if (r) { + r->clear_search(); + } +} + bool ada_has_credentials(ada_url result) noexcept { ada::result& r = get_instance(result); if (!r) { @@ -15168,6 +15342,275 @@ const ada_url_components* ada_get_components(ada_url result) noexcept { } return reinterpret_cast(&r->get_components()); } + +ada_owned_string ada_idna_to_unicode(const char* input, size_t length) { + std::string out = ada::idna::to_unicode(std::string_view(input, length)); + ada_owned_string owned{}; + owned.length = out.length(); + owned.data = new char[owned.length]; + memcpy((void*)owned.data, out.data(), owned.length); + return owned; +} + +ada_owned_string ada_idna_to_ascii(const char* input, size_t length) { + std::string out = ada::idna::to_ascii(std::string_view(input, length)); + ada_owned_string owned{}; + owned.length = out.size(); + owned.data = new char[owned.length]; + memcpy((void*)owned.data, out.data(), owned.length); + return owned; +} + +ada_url_search_params ada_parse_search_params(const char* input, + size_t length) { + return new ada::result( + ada::url_search_params(std::string_view(input, length))); +} + +void ada_free_search_params(ada_url_search_params result) { + ada::result* r = + (ada::result*)result; + delete r; +} + +ada_owned_string ada_search_params_to_string(ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) return ada_owned_string{NULL, 0}; + std::string out = r->to_string(); + ada_owned_string owned{}; + owned.length = out.size(); + owned.data = new char[owned.length]; + memcpy((void*)owned.data, out.data(), owned.length); + return owned; +} + +size_t ada_search_params_size(ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) return 0; + return r->size(); +} + +void ada_search_params_sort(ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (r) r->sort(); +} + +void ada_search_params_append(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length) { + ada::result& r = + *(ada::result*)result; + if (r) { + r->append(std::string_view(key, key_length), + std::string_view(value, value_length)); + } +} + +void ada_search_params_set(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length) { + ada::result& r = + *(ada::result*)result; + if (r) { + r->set(std::string_view(key, key_length), + std::string_view(value, value_length)); + } +} + +void ada_search_params_remove(ada_url_search_params result, const char* key, + size_t key_length) { + ada::result& r = + *(ada::result*)result; + if (r) { + r->remove(std::string_view(key, key_length)); + } +} + +void ada_search_params_remove_value(ada_url_search_params result, + const char* key, size_t key_length, + const char* value, size_t value_length) { + ada::result& r = + *(ada::result*)result; + if (r) { + r->remove(std::string_view(key, key_length), + std::string_view(value, value_length)); + } +} + +bool ada_search_params_has(ada_url_search_params result, const char* key, + size_t key_length) { + ada::result& r = + *(ada::result*)result; + if (!r) return false; + return r->has(std::string_view(key, key_length)); +} + +bool ada_search_params_has_value(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length) { + ada::result& r = + *(ada::result*)result; + if (!r) return false; + return r->has(std::string_view(key, key_length), + std::string_view(value, value_length)); +} + +ada_string ada_search_params_get(ada_url_search_params result, const char* key, + size_t key_length) { + ada::result& r = + *(ada::result*)result; + if (!r) return ada_string_create(NULL, 0); + auto found = r->get(std::string_view(key, key_length)); + if (!found.has_value()) return ada_string_create(NULL, 0); + return ada_string_create(found->data(), found->length()); +} + +ada_strings ada_search_params_get_all(ada_url_search_params result, + const char* key, size_t key_length) { + ada::result& r = + *(ada::result*)result; + if (!r) { + return new ada::result>( + std::vector()); + } + return new ada::result>( + r->get_all(std::string_view(key, key_length))); +} + +ada_url_search_params_keys_iter ada_search_params_get_keys( + ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) { + return new ada::result( + ada::url_search_params_keys_iter()); + } + return new ada::result(r->get_keys()); +} + +ada_url_search_params_values_iter ada_search_params_get_values( + ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) { + return new ada::result( + ada::url_search_params_values_iter()); + } + return new ada::result(r->get_values()); +} + +ada_url_search_params_entries_iter ada_search_params_get_entries( + ada_url_search_params result) { + ada::result& r = + *(ada::result*)result; + if (!r) { + return new ada::result( + ada::url_search_params_entries_iter()); + } + return new ada::result(r->get_entries()); +} + +void ada_free_strings(ada_strings result) { + ada::result>* r = + (ada::result>*)result; + delete r; +} + +size_t ada_strings_size(ada_strings result) { + ada::result>* r = + (ada::result>*)result; + if (!r) return 0; + return (*r)->size(); +} + +ada_string ada_strings_get(ada_strings result, size_t index) { + ada::result>* r = + (ada::result>*)result; + if (!r) return ada_string_create(NULL, 0); + std::string_view view = (*r)->at(index); + return ada_string_create(view.data(), view.length()); +} + +void ada_free_search_params_keys_iter(ada_url_search_params_keys_iter result) { + ada::result* r = + (ada::result*)result; + delete r; +} + +ada_string ada_search_params_keys_iter_next( + ada_url_search_params_keys_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return ada_string_create(NULL, 0); + auto next = (*r)->next(); + if (!next.has_value()) return ada_string_create(NULL, 0); + return ada_string_create(next->data(), next->length()); +} + +bool ada_search_params_keys_iter_has_next( + ada_url_search_params_keys_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return false; + return (*r)->has_next(); +} + +void ada_free_search_params_values_iter( + ada_url_search_params_values_iter result) { + ada::result* r = + (ada::result*)result; + delete r; +} + +ada_string ada_search_params_values_iter_next( + ada_url_search_params_values_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return ada_string_create(NULL, 0); + auto next = (*r)->next(); + if (!next.has_value()) return ada_string_create(NULL, 0); + return ada_string_create(next->data(), next->length()); +} + +bool ada_search_params_values_iter_has_next( + ada_url_search_params_values_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return false; + return (*r)->has_next(); +} + +void ada_free_search_params_entries_iter( + ada_url_search_params_entries_iter result) { + ada::result* r = + (ada::result*)result; + delete r; +} + +ada_string_pair ada_search_params_entries_iter_next( + ada_url_search_params_entries_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return {ada_string_create(NULL, 0), ada_string_create(NULL, 0)}; + auto next = (*r)->next(); + if (!next.has_value()) + return {ada_string_create(NULL, 0), ada_string_create(NULL, 0)}; + return ada_string_pair{ + ada_string_create(next->first.data(), next->first.length()), + ada_string_create(next->second.data(), next->second.length())}; +} + +bool ada_search_params_entries_iter_has_next( + ada_url_search_params_entries_iter result) { + ada::result* r = + (ada::result*)result; + if (!r) return false; + return (*r)->has_next(); +} + } // extern "C" /* end file src/ada_c.cpp */ /* end file src/ada.cpp */ diff --git a/ada.h b/ada.h index cd9a69a..721ac73 100644 --- a/ada.h +++ b/ada.h @@ -1,4 +1,4 @@ -/* auto-generated on 2023-05-19 00:02:33 -0400. Do not edit! */ +/* auto-generated on 2024-03-07 13:23:39 -0500. Do not edit! */ /* begin file include/ada.h */ /** * @file ada.h @@ -8,7 +8,7 @@ #define ADA_H /* begin file include/ada/ada_idna.h */ -/* auto-generated on 2023-05-07 19:12:14 -0400. Do not edit! */ +/* auto-generated on 2023-09-19 15:58:51 -0400. Do not edit! */ /* begin file include/idna.h */ #ifndef ADA_IDNA_H #define ADA_IDNA_H @@ -98,7 +98,7 @@ namespace ada::idna { /** * @see https://www.unicode.org/reports/tr46/#Validity_Criteria */ -bool is_label_valid(const std::u32string_view label); +bool is_label_valid(std::u32string_view label); } // namespace ada::idna @@ -120,7 +120,6 @@ namespace ada::idna { // this function. We also do not trim control characters. We also assume that // the input is not empty. We return "" on error. // -// Example: "www.öbb.at" -> "www.xn--bb-eka.at" // // This function may accept or even produce invalid domains. std::string to_ascii(std::string_view ut8_string); @@ -130,9 +129,8 @@ std::string to_ascii(std::string_view ut8_string); // https://url.spec.whatwg.org/#forbidden-domain-code-point bool contains_forbidden_domain_code_point(std::string_view ascii_string); -bool constexpr begins_with(std::u32string_view view, - std::u32string_view prefix); -bool constexpr begins_with(std::string_view view, std::string_view prefix); +bool begins_with(std::u32string_view view, std::u32string_view prefix); +bool begins_with(std::string_view view, std::string_view prefix); bool constexpr is_ascii(std::u32string_view view); bool constexpr is_ascii(std::string_view view); @@ -425,7 +423,7 @@ namespace ada { #define ADA_DEVELOPMENT_CHECKS 1 #endif // __OPTIMIZE__ #endif // _MSC_VER -#endif // SIMDJSON_DEVELOPMENT_CHECKS +#endif // ADA_DEVELOPMENT_CHECKS #define ADA_STR(x) #x @@ -481,19 +479,27 @@ namespace ada { #endif // ADA_COMMON_DEFS_H /* end file include/ada/common_defs.h */ -#include +#include /** + * These functions are not part of our public API and may + * change at any time. + * @private * @namespace ada::character_sets * @brief Includes the definitions for unicode character sets. */ namespace ada::character_sets { -ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i); +ada_really_inline bool bit_at(const uint8_t a[], uint8_t i); } // namespace ada::character_sets #endif // ADA_CHARACTER_SETS_H /* end file include/ada/character_sets.h */ +/** + * These functions are not part of our public API and may + * change at any time. + * @private + */ namespace ada::character_sets { constexpr char hex[1024] = @@ -926,13 +932,79 @@ constexpr uint8_t PATH_PERCENT_ENCODE[32] = { // F8 F9 FA FB FC FD FE FF 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80}; +constexpr uint8_t WWW_FORM_URLENCODED_PERCENT_ENCODE[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x00 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 28 29 2A 2B 2C 2D 2E 2F + 0x01 | 0x02 | 0x00 | 0x08 | 0x10 | 0x00 | 0x00 | 0x80, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x40 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80}; + ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i) { return !!(a[i >> 3] & (1 << (i & 7))); } } // namespace ada::character_sets -#endif // ADA_CHARACTER_SETS_H +#endif // ADA_CHARACTER_SETS_INL_H /* end file include/ada/character_sets-inl.h */ /* begin file include/ada/checkers-inl.h */ /** @@ -943,13 +1015,14 @@ ada_really_inline bool bit_at(const uint8_t a[], const uint8_t i) { #define ADA_CHECKERS_INL_H +#include #include #include namespace ada::checkers { inline bool has_hex_prefix_unsafe(std::string_view input) { - // This is actualy efficient code, see has_hex_prefix for the assembly. + // This is actually efficient code, see has_hex_prefix for the assembly. uint32_t value_one = 1; bool is_little_endian = (reinterpret_cast(&value_one)[0] == 1); uint16_t word0x{}; @@ -989,11 +1062,12 @@ inline constexpr bool is_normalized_windows_drive_letter( return input.size() >= 2 && (is_alpha(input[0]) && (input[1] == ':')); } -ada_really_inline constexpr bool begins_with(std::string_view view, - std::string_view prefix) { +ada_really_inline bool begins_with(std::string_view view, + std::string_view prefix) { // in C++20, you have view.begins_with(prefix) + // std::equal is constexpr in C++20 return view.size() >= prefix.size() && - (view.substr(0, prefix.size()) == prefix); + std::equal(prefix.begin(), prefix.end(), view.begin()); } } // namespace ada::checkers @@ -1246,12 +1320,12 @@ struct url_components { * @return true if the offset values are * consistent with a possible URL string */ - bool check_offset_consistency() const noexcept; + [[nodiscard]] bool check_offset_consistency() const noexcept; /** * Converts a url_components to JSON stringified version. */ - std::string to_string() const; + [[nodiscard]] std::string to_string() const; }; // struct url_components @@ -1341,6 +1415,25 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept; namespace ada { +/** + * Type of URL host as an enum. + */ +enum url_host_type : uint8_t { + /** + * Represents common URLs such as "https://www.google.com" + */ + DEFAULT = 0, + /** + * Represents ipv4 addresses such as "http://127.0.0.1" + */ + IPV4 = 1, + /** + * Represents ipv6 addresses such as + * "http://[2001:db8:3333:4444:5555:6666:7777:8888]" + */ + IPV6 = 2, +}; + /** * @brief Base class of URL implementations * @@ -1363,6 +1456,11 @@ struct url_base { */ bool has_opaque_path{false}; + /** + * URL hosts type + */ + url_host_type host_type = url_host_type::DEFAULT; + /** * @private */ @@ -1375,7 +1473,7 @@ struct url_base { [[nodiscard]] ada_really_inline bool is_special() const noexcept; /** - * The origin getter steps are to return the serialization of this’s URL’s + * The origin getter steps are to return the serialization of this's URL's * origin. [HTML] * @return a newly allocated string. * @see https://url.spec.whatwg.org/#concept-url-origin @@ -1415,13 +1513,17 @@ struct url_base { * @return On failure, it returns zero. * @see https://url.spec.whatwg.org/#host-parsing */ - virtual ada_really_inline size_t parse_port( - std::string_view view, bool check_trailing_content = false) noexcept = 0; + virtual size_t parse_port(std::string_view view, + bool check_trailing_content) noexcept = 0; + + virtual ada_really_inline size_t parse_port(std::string_view view) noexcept { + return this->parse_port(view, false); + } /** * Returns a JSON string representation of this URL. */ - virtual std::string to_string() const = 0; + [[nodiscard]] virtual std::string to_string() const = 0; /** @private */ virtual inline void clear_pathname() = 0; @@ -1430,10 +1532,10 @@ struct url_base { virtual inline void clear_search() = 0; /** @private */ - virtual inline bool has_hash() const noexcept = 0; + [[nodiscard]] virtual inline bool has_hash() const noexcept = 0; /** @private */ - virtual inline bool has_search() const noexcept = 0; + [[nodiscard]] virtual inline bool has_search() const noexcept = 0; }; // url_base @@ -1446,6 +1548,9 @@ struct url_base { #include /** + * These functions are not part of our public API and may + * change at any time. + * * @private * @namespace ada::helpers * @brief Includes the definitions for helper functions @@ -1503,7 +1608,7 @@ ada_really_inline bool shorten_path(std::string_view& path, * * @see https://url.spec.whatwg.org/ */ -ada_really_inline void parse_prepared_path(const std::string_view input, +ada_really_inline void parse_prepared_path(std::string_view input, ada::scheme::type type, std::string& path); @@ -1576,18 +1681,6 @@ template ada_really_inline void strip_trailing_spaces_from_opaque_path( url_type& url) noexcept; -/** - * @private - * Reverse the order of the bytes. - */ -ada_really_inline uint64_t swap_bytes(uint64_t val) noexcept; - -/** - * @private - * Reverse the order of the bytes but only if the system is big endian - */ -ada_really_inline uint64_t swap_bytes_if_big_endian(uint64_t val) noexcept; - /** * @private * Finds the delimiter of a view in authority state. @@ -1620,6 +1713,7 @@ inline void inner_concat(std::string& buffer, T t, Args... args) { } /** + * @private * Concatenate the arguments and return a string. * @returns a string */ @@ -1631,6 +1725,7 @@ std::string concat(Args... args) { } /** + * @private * @return Number of leading zeroes. */ inline int leading_zeroes(uint32_t input_num) noexcept { @@ -1644,6 +1739,7 @@ inline int leading_zeroes(uint32_t input_num) noexcept { } /** + * @private * Counts the number of decimal digits necessary to represent x. * faster than std::to_string(x).size(). * @return digit count @@ -1703,8 +1799,8 @@ inline int fast_digit_count(uint32_t x) noexcept { #define TL_EXPECTED_HPP #define TL_EXPECTED_VERSION_MAJOR 1 -#define TL_EXPECTED_VERSION_MINOR 0 -#define TL_EXPECTED_VERSION_PATCH 1 +#define TL_EXPECTED_VERSION_MINOR 1 +#define TL_EXPECTED_VERSION_PATCH 0 #include #include @@ -1737,6 +1833,16 @@ inline int fast_digit_count(uint32_t x) noexcept { #define TL_EXPECTED_GCC55 #endif +#if !defined(TL_ASSERT) +// can't have assert in constexpr in C++11 and GCC 4.9 has a compiler bug +#if (__cplusplus > 201103L) && !defined(TL_EXPECTED_GCC49) +#include +#define TL_ASSERT(x) assert(x) +#else +#define TL_ASSERT(x) +#endif +#endif + #if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \ !defined(__clang__)) // GCC < 5 doesn't support overloading on const&& for member functions @@ -1892,6 +1998,7 @@ template #ifdef TL_EXPECTED_EXCEPTIONS_ENABLED throw std::forward(e); #else + (void)e; #ifdef _MSC_VER __assume(0); #else @@ -2532,7 +2639,7 @@ struct expected_operations_base : expected_storage_base { geterr().~unexpected(); construct(std::move(rhs).get()); } else { - assign_common(rhs); + assign_common(std::move(rhs)); } } @@ -3190,6 +3297,53 @@ class expected : private detail::expected_move_assign_base, return map_error_impl(std::move(*this), std::forward(f)); } #endif +#endif +#if defined(TL_EXPECTED_CXX14) && !defined(TL_EXPECTED_GCC49) && \ + !defined(TL_EXPECTED_GCC54) && !defined(TL_EXPECTED_GCC55) + template + TL_EXPECTED_11_CONSTEXPR auto transform_error(F &&f) & { + return map_error_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR auto transform_error(F &&f) && { + return map_error_impl(std::move(*this), std::forward(f)); + } + template + constexpr auto transform_error(F &&f) const & { + return map_error_impl(*this, std::forward(f)); + } + template + constexpr auto transform_error(F &&f) const && { + return map_error_impl(std::move(*this), std::forward(f)); + } +#else + template + TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval(), + std::declval())) + transform_error(F &&f) & { + return map_error_impl(*this, std::forward(f)); + } + template + TL_EXPECTED_11_CONSTEXPR decltype(map_error_impl(std::declval(), + std::declval())) + transform_error(F &&f) && { + return map_error_impl(std::move(*this), std::forward(f)); + } + template + constexpr decltype(map_error_impl(std::declval(), + std::declval())) + transform_error(F &&f) const & { + return map_error_impl(*this, std::forward(f)); + } + +#ifndef TL_EXPECTED_NO_CONSTRR + template + constexpr decltype(map_error_impl(std::declval(), + std::declval())) + transform_error(F &&f) const && { + return map_error_impl(std::move(*this), std::forward(f)); + } +#endif #endif template expected TL_EXPECTED_11_CONSTEXPR or_else(F &&f) & { @@ -3632,27 +3786,37 @@ class expected : private detail::expected_move_assign_base, } } - constexpr const T *operator->() const { return valptr(); } - TL_EXPECTED_11_CONSTEXPR T *operator->() { return valptr(); } + constexpr const T *operator->() const { + TL_ASSERT(has_value()); + return valptr(); + } + TL_EXPECTED_11_CONSTEXPR T *operator->() { + TL_ASSERT(has_value()); + return valptr(); + } template ::value> * = nullptr> constexpr const U &operator*() const & { + TL_ASSERT(has_value()); return val(); } template ::value> * = nullptr> TL_EXPECTED_11_CONSTEXPR U &operator*() & { + TL_ASSERT(has_value()); return val(); } template ::value> * = nullptr> constexpr const U &&operator*() const && { + TL_ASSERT(has_value()); return std::move(val()); } template ::value> * = nullptr> TL_EXPECTED_11_CONSTEXPR U &&operator*() && { + TL_ASSERT(has_value()); return std::move(val()); } @@ -3688,10 +3852,22 @@ class expected : private detail::expected_move_assign_base, return std::move(val()); } - constexpr const E &error() const & { return err().value(); } - TL_EXPECTED_11_CONSTEXPR E &error() & { return err().value(); } - constexpr const E &&error() const && { return std::move(err().value()); } - TL_EXPECTED_11_CONSTEXPR E &&error() && { return std::move(err().value()); } + constexpr const E &error() const & { + TL_ASSERT(!has_value()); + return err().value(); + } + TL_EXPECTED_11_CONSTEXPR E &error() & { + TL_ASSERT(!has_value()); + return err().value(); + } + constexpr const E &&error() const && { + TL_ASSERT(!has_value()); + return std::move(err().value()); + } + TL_EXPECTED_11_CONSTEXPR E &&error() && { + TL_ASSERT(!has_value()); + return std::move(err().value()); + } template constexpr T value_or(U &&v) const & { @@ -4211,7 +4387,7 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept { } // namespace ada::scheme -#endif // ADA_SCHEME_H +#endif // ADA_SCHEME_INL_H /* end file include/ada/scheme-inl.h */ /* begin file include/ada/serializers.h */ /** @@ -4253,7 +4429,7 @@ std::string ipv6(const std::array& address) noexcept; * network address. * @see https://url.spec.whatwg.org/#concept-ipv4-serializer */ -std::string ipv4(const uint64_t address) noexcept; +std::string ipv4(uint64_t address) noexcept; } // namespace ada::serializers @@ -4272,22 +4448,27 @@ std::string ipv4(const uint64_t address) noexcept; #include /** + * Unicode operations. These functions are not part of our public API and may + * change at any time. + * + * @private * @namespace ada::unicode * @brief Includes the definitions for unicode operations */ namespace ada::unicode { /** + * @private * We receive a UTF-8 string representing a domain name. * If the string is percent encoded, we apply percent decoding. * * Given a domain, we need to identify its labels. * They are separated by label-separators: * - * U+002E ( . ) FULL STOP - * U+FF0E ( . ) FULLWIDTH FULL STOP - * U+3002 ( 。 ) IDEOGRAPHIC FULL STOP - * U+FF61 ( 。 ) HALFWIDTH IDEOGRAPHIC FULL STOP + * U+002E (.) FULL STOP + * U+FF0E FULLWIDTH FULL STOP + * U+3002 IDEOGRAPHIC FULL STOP + * U+FF61 HALFWIDTH IDEOGRAPHIC FULL STOP * * They are all mapped to U+002E. * @@ -4321,11 +4502,13 @@ bool to_ascii(std::optional& out, std::string_view plain, size_t first_percent); /** + * @private * @see https://www.unicode.org/reports/tr46/#ToUnicode */ std::string to_unicode(std::string_view input); /** + * @private * Checks if the input has tab or newline characters. * * @attention The has_tabs_or_newline function is a bottleneck and it is simple @@ -4335,13 +4518,14 @@ ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept; /** + * @private * Checks if the input is a forbidden host code point. * @see https://url.spec.whatwg.org/#forbidden-host-code-point */ -ada_really_inline constexpr bool is_forbidden_host_code_point( - const char c) noexcept; +ada_really_inline constexpr bool is_forbidden_host_code_point(char c) noexcept; /** + * @private * Checks if the input contains a forbidden domain code point. * @see https://url.spec.whatwg.org/#forbidden-domain-code-point */ @@ -4349,6 +4533,7 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point( const char* input, size_t length) noexcept; /** + * @private * Checks if the input contains a forbidden domain code point in which case * the first bit is set to 1. If the input contains an upper case ASCII letter, * then the second bit is set to 1. @@ -4359,66 +4544,77 @@ contains_forbidden_domain_code_point_or_upper(const char* input, size_t length) noexcept; /** - * Checks if the input is a forbidden doamin code point. + * @private + * Checks if the input is a forbidden domain code point. * @see https://url.spec.whatwg.org/#forbidden-domain-code-point */ ada_really_inline constexpr bool is_forbidden_domain_code_point( - const char c) noexcept; + char c) noexcept; /** + * @private * Checks if the input is alphanumeric, '+', '-' or '.' */ -ada_really_inline constexpr bool is_alnum_plus(const char c) noexcept; +ada_really_inline constexpr bool is_alnum_plus(char c) noexcept; /** + * @private * @details An ASCII hex digit is an ASCII upper hex digit or ASCII lower hex * digit. An ASCII upper hex digit is an ASCII digit or a code point in the * range U+0041 (A) to U+0046 (F), inclusive. An ASCII lower hex digit is an * ASCII digit or a code point in the range U+0061 (a) to U+0066 (f), inclusive. */ -ada_really_inline constexpr bool is_ascii_hex_digit(const char c) noexcept; +ada_really_inline constexpr bool is_ascii_hex_digit(char c) noexcept; /** + * @private * Checks if the input is a C0 control or space character. * * @details A C0 control or space is a C0 control or U+0020 SPACE. * A C0 control is a code point in the range U+0000 NULL to U+001F INFORMATION * SEPARATOR ONE, inclusive. */ -ada_really_inline constexpr bool is_c0_control_or_space(const char c) noexcept; +ada_really_inline constexpr bool is_c0_control_or_space(char c) noexcept; /** + * @private * Checks if the input is a ASCII tab or newline character. * * @details An ASCII tab or newline is U+0009 TAB, U+000A LF, or U+000D CR. */ -ada_really_inline constexpr bool is_ascii_tab_or_newline(const char c) noexcept; +ada_really_inline constexpr bool is_ascii_tab_or_newline(char c) noexcept; /** + * @private * @details A double-dot path segment must be ".." or an ASCII case-insensitive * match for ".%2e", "%2e.", or "%2e%2e". */ ada_really_inline ada_constexpr bool is_double_dot_path_segment( - const std::string_view input) noexcept; + std::string_view input) noexcept; /** + * @private * @details A single-dot path segment must be "." or an ASCII case-insensitive * match for "%2e". */ ada_really_inline constexpr bool is_single_dot_path_segment( - const std::string_view input) noexcept; + std::string_view input) noexcept; /** + * @private * @details ipv4 character might contain 0-9 or a-f character ranges. */ -ada_really_inline constexpr bool is_lowercase_hex(const char c) noexcept; +ada_really_inline constexpr bool is_lowercase_hex(char c) noexcept; /** - * @details Convert hex to binary. + * @private + * @details Convert hex to binary. Caller is responsible to ensure that + * the parameter is an hexadecimal digit (0-9, A-F, a-f). */ -unsigned constexpr convert_hex_to_binary(char c) noexcept; +ada_really_inline unsigned constexpr convert_hex_to_binary(char c) noexcept; /** + * @private * first_percent should be = input.find('%') * * @todo It would be faster as noexcept maybe, but it could be unsafe since. @@ -4426,22 +4622,25 @@ unsigned constexpr convert_hex_to_binary(char c) noexcept; * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L245 * @see https://encoding.spec.whatwg.org/#utf-8-decode-without-bom */ -std::string percent_decode(const std::string_view input, size_t first_percent); +std::string percent_decode(std::string_view input, size_t first_percent); /** + * @private * Returns a percent-encoding string whether percent encoding was needed or not. * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 */ -std::string percent_encode(const std::string_view input, +std::string percent_encode(std::string_view input, const uint8_t character_set[]); /** + * @private * Returns a percent-encoded string version of input, while starting the percent * encoding at the provided index. * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 */ -std::string percent_encode(const std::string_view input, +std::string percent_encode(std::string_view input, const uint8_t character_set[], size_t index); /** + * @private * Returns true if percent encoding was needed, in which case, we store * the percent-encoded content in 'out'. If the boolean 'append' is set to * true, the content is appended to 'out'. @@ -4449,15 +4648,17 @@ std::string percent_encode(const std::string_view input, * @see https://github.com/nodejs/node/blob/main/src/node_url.cc#L226 */ template -bool percent_encode(const std::string_view input, const uint8_t character_set[], +bool percent_encode(std::string_view input, const uint8_t character_set[], std::string& out); /** + * @private * Returns the index at which percent encoding should start, or (equivalently), * the length of the prefix that does not require percent encoding. */ -ada_really_inline size_t percent_encode_index(const std::string_view input, +ada_really_inline size_t percent_encode_index(std::string_view input, const uint8_t character_set[]); /** + * @private * Lowers the string in-place, assuming that the content is ASCII. * Return true if the content was ASCII. */ @@ -4502,22 +4703,22 @@ struct url_aggregator : url_base { url_aggregator(url_aggregator &&u) noexcept = default; url_aggregator &operator=(url_aggregator &&u) noexcept = default; url_aggregator &operator=(const url_aggregator &u) = default; - ~url_aggregator() = default; - - bool set_href(const std::string_view input); - bool set_host(const std::string_view input); - bool set_hostname(const std::string_view input); - bool set_protocol(const std::string_view input); - bool set_username(const std::string_view input); - bool set_password(const std::string_view input); - bool set_port(const std::string_view input); - bool set_pathname(const std::string_view input); - void set_search(const std::string_view input); - void set_hash(const std::string_view input); + ~url_aggregator() override = default; + + bool set_href(std::string_view input); + bool set_host(std::string_view input); + bool set_hostname(std::string_view input); + bool set_protocol(std::string_view input); + bool set_username(std::string_view input); + bool set_password(std::string_view input); + bool set_port(std::string_view input); + bool set_pathname(std::string_view input); + void set_search(std::string_view input); + void set_hash(std::string_view input); [[nodiscard]] bool has_valid_domain() const noexcept override; /** - * The origin getter steps are to return the serialization of this’s URL’s + * The origin getter steps are to return the serialization of this's URL's * origin. [HTML] * @return a newly allocated string. * @see https://url.spec.whatwg.org/#concept-url-origin @@ -4531,37 +4732,37 @@ struct url_aggregator : url_base { * @see https://url.spec.whatwg.org/#dom-url-href * @see https://url.spec.whatwg.org/#concept-url-serializer */ - inline std::string_view get_href() const noexcept; + [[nodiscard]] inline std::string_view get_href() const noexcept; /** - * The username getter steps are to return this’s URL’s username. + * The username getter steps are to return this's URL's username. * This function does not allocate memory. * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-username */ [[nodiscard]] std::string_view get_username() const noexcept; /** - * The password getter steps are to return this’s URL’s password. + * The password getter steps are to return this's URL's password. * This function does not allocate memory. * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-password */ [[nodiscard]] std::string_view get_password() const noexcept; /** - * Return this’s URL’s port, serialized. + * Return this's URL's port, serialized. * This function does not allocate memory. * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-port */ [[nodiscard]] std::string_view get_port() const noexcept; /** - * Return U+0023 (#), followed by this’s URL’s fragment. + * Return U+0023 (#), followed by this's URL's fragment. * This function does not allocate memory. * @return a lightweight std::string_view.. * @see https://url.spec.whatwg.org/#dom-url-hash */ [[nodiscard]] std::string_view get_hash() const noexcept; /** - * Return url’s host, serialized, followed by U+003A (:) and url’s port, + * Return url's host, serialized, followed by U+003A (:) and url's port, * serialized. * This function does not allocate memory. * When there is no host, this function returns the empty view. @@ -4570,7 +4771,7 @@ struct url_aggregator : url_base { */ [[nodiscard]] std::string_view get_host() const noexcept; /** - * Return this’s URL’s host, serialized. + * Return this's URL's host, serialized. * This function does not allocate memory. * When there is no host, this function returns the empty view. * @return a lightweight std::string_view. @@ -4579,28 +4780,28 @@ struct url_aggregator : url_base { [[nodiscard]] std::string_view get_hostname() const noexcept; /** * The pathname getter steps are to return the result of URL path serializing - * this’s URL. + * this's URL. * This function does not allocate memory. * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-pathname */ [[nodiscard]] std::string_view get_pathname() const noexcept; /** - * Compute the pathname length in bytes witout instantiating a view or a + * Compute the pathname length in bytes without instantiating a view or a * string. * @return size of the pathname in bytes * @see https://url.spec.whatwg.org/#dom-url-pathname */ - ada_really_inline uint32_t get_pathname_length() const noexcept; + [[nodiscard]] ada_really_inline uint32_t get_pathname_length() const noexcept; /** - * Return U+003F (?), followed by this’s URL’s query. + * Return U+003F (?), followed by this's URL's query. * This function does not allocate memory. * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-search */ [[nodiscard]] std::string_view get_search() const noexcept; /** - * The protocol getter steps are to return this’s URL’s scheme, followed by + * The protocol getter steps are to return this's URL's scheme, followed by * U+003A (:). * This function does not allocate memory. * @return a lightweight std::string_view. @@ -4640,18 +4841,18 @@ struct url_aggregator : url_base { /** * Returns a string representation of this URL. */ - std::string to_string() const override; + [[nodiscard]] std::string to_string() const override; /** * Returns a string diagram of this URL. */ - std::string to_diagram() const; + [[nodiscard]] std::string to_diagram() const; /** * Verifies that the parsed URL could be valid. Useful for debugging purposes. * @return true if the URL is valid, otherwise return true of the offsets are * possible. */ - bool validate() const noexcept; + [[nodiscard]] bool validate() const noexcept; /** @return true if it has an host but it is the empty string */ [[nodiscard]] inline bool has_empty_hostname() const noexcept; @@ -4670,6 +4871,10 @@ struct url_aggregator : url_base { /** @return true if the URL has a search component */ [[nodiscard]] inline bool has_search() const noexcept override; + inline void clear_port(); + inline void clear_hash(); + inline void clear_search() override; + private: friend ada::url_aggregator ada::parser::parse_url( std::string_view, const ada::url_aggregator *); @@ -4694,15 +4899,20 @@ struct url_aggregator : url_base { */ inline void reserve(uint32_t capacity); - ada_really_inline size_t - parse_port(std::string_view view, - bool check_trailing_content = false) noexcept override; + ada_really_inline size_t parse_port( + std::string_view view, bool check_trailing_content) noexcept override; + + ada_really_inline size_t parse_port(std::string_view view) noexcept override { + return this->parse_port(view, false); + } /** - * Return true on success. + * Return true on success. The 'in_place' parameter indicates whether the + * the string_view input is pointing in the buffer. When in_place is false, + * we must nearly always update the buffer. * @see https://url.spec.whatwg.org/#concept-ipv4-parser */ - [[nodiscard]] bool parse_ipv4(std::string_view input); + [[nodiscard]] bool parse_ipv4(std::string_view input, bool in_place); /** * Return true on success. @@ -4725,7 +4935,7 @@ struct url_aggregator : url_base { [[nodiscard]] inline bool cannot_have_credentials_or_port() const; template - bool set_host_or_hostname(const std::string_view input); + bool set_host_or_hostname(std::string_view input); ada_really_inline bool parse_host(std::string_view input); @@ -4736,29 +4946,26 @@ struct url_aggregator : url_base { inline void update_base_search(std::string_view input); inline void update_base_search(std::string_view input, const uint8_t *query_percent_encode_set); - inline void update_base_pathname(const std::string_view input); - inline void update_base_username(const std::string_view input); - inline void append_base_username(const std::string_view input); - inline void update_base_password(const std::string_view input); - inline void append_base_password(const std::string_view input); + inline void update_base_pathname(std::string_view input); + inline void update_base_username(std::string_view input); + inline void append_base_username(std::string_view input); + inline void update_base_password(std::string_view input); + inline void append_base_password(std::string_view input); inline void update_base_port(uint32_t input); - inline void append_base_pathname(const std::string_view input); - inline uint32_t retrieve_base_port() const; - inline void clear_port(); + inline void append_base_pathname(std::string_view input); + [[nodiscard]] inline uint32_t retrieve_base_port() const; inline void clear_hostname(); - inline void clear_hash(); - inline void clear_pathname() override; - inline void clear_search() override; inline void clear_password(); - inline bool has_dash_dot() const noexcept; + inline void clear_pathname() override; + [[nodiscard]] inline bool has_dash_dot() const noexcept; void delete_dash_dot(); inline void consume_prepared_path(std::string_view input); template [[nodiscard]] ada_really_inline bool parse_scheme_with_colon( - const std::string_view input); + std::string_view input); ada_really_inline uint32_t replace_and_resize(uint32_t start, uint32_t end, std::string_view input); - inline bool has_authority() const noexcept; + [[nodiscard]] inline bool has_authority() const noexcept; inline void set_protocol_as_file(); inline void set_scheme(std::string_view new_scheme) noexcept; /** @@ -4789,12 +4996,16 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u); #include /** + * These functions are not part of our public API and may + * change at any time. + * @private * @namespace ada::checkers * @brief Includes the definitions for validation functions */ namespace ada::checkers { /** + * @private * Assuming that x is an ASCII letter, this function returns the lower case * equivalent. * @details More likely to be inlined by the compiler and constexpr. @@ -4802,6 +5013,7 @@ namespace ada::checkers { constexpr char to_lower(char x) noexcept; /** + * @private * Returns true if the character is an ASCII letter. Equivalent to std::isalpha * but more likely to be inlined by the compiler. * @@ -4810,6 +5022,7 @@ constexpr char to_lower(char x) noexcept; constexpr bool is_alpha(char x) noexcept; /** + * @private * Check whether a string starts with 0x or 0X. The function is only * safe if input.size() >=2. * @@ -4817,17 +5030,20 @@ constexpr bool is_alpha(char x) noexcept; */ inline bool has_hex_prefix_unsafe(std::string_view input); /** + * @private * Check whether a string starts with 0x or 0X. */ inline bool has_hex_prefix(std::string_view input); /** + * @private * Check whether x is an ASCII digit. More likely to be inlined than * std::isdigit. */ constexpr bool is_digit(char x) noexcept; /** + * @private * @details A string starts with a Windows drive letter if all of the following * are true: * @@ -4841,6 +5057,7 @@ constexpr bool is_digit(char x) noexcept; inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept; /** + * @private * @details A normalized Windows drive letter is a Windows drive letter of which * the second code point is U+003A (:). */ @@ -4848,17 +5065,22 @@ inline constexpr bool is_normalized_windows_drive_letter( std::string_view input) noexcept; /** - * @warning Will be removed when Ada supports C++20. + * @private + * @warning Will be removed when Ada requires C++20. */ -ada_really_inline constexpr bool begins_with(std::string_view view, - std::string_view prefix); +ada_really_inline bool begins_with(std::string_view view, + std::string_view prefix); /** - * Returns true if an input is an ipv4 address. + * @private + * Returns true if an input is an ipv4 address. It is assumed that the string + * does not contain uppercase ASCII characters (the input should have been + * lowered cased before calling this function) and is not empty. */ ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept; /** + * @private * Returns a bitset. If the first bit is set, then at least one character needs * percent encoding. If the second bit is set, a \\ is found. If the third bit * is set then we have a dot. If the fourth bit is set, then we have a percent @@ -4868,6 +5090,7 @@ ada_really_inline constexpr uint8_t path_signature( std::string_view input) noexcept; /** + * @private * Returns true if the length of the domain name and its labels are according to * the specifications. The length of the domain must be 255 octets (253 * characters not including the last 2 which are the empty label reserved at the @@ -4920,52 +5143,52 @@ struct url : url_base { url(url &&u) noexcept = default; url &operator=(url &&u) noexcept = default; url &operator=(const url &u) = default; - ~url() = default; + ~url() override = default; /** * @private - * A URL’s username is an ASCII string identifying a username. It is initially + * A URL's username is an ASCII string identifying a username. It is initially * the empty string. */ std::string username{}; /** * @private - * A URL’s password is an ASCII string identifying a password. It is initially + * A URL's password is an ASCII string identifying a password. It is initially * the empty string. */ std::string password{}; /** * @private - * A URL’s host is null or a host. It is initially null. + * A URL's host is null or a host. It is initially null. */ std::optional host{}; /** * @private - * A URL’s port is either null or a 16-bit unsigned integer that identifies a + * A URL's port is either null or a 16-bit unsigned integer that identifies a * networking port. It is initially null. */ std::optional port{}; /** * @private - * A URL’s path is either an ASCII string or a list of zero or more ASCII + * A URL's path is either an ASCII string or a list of zero or more ASCII * strings, usually identifying a location. */ std::string path{}; /** * @private - * A URL’s query is either null or an ASCII string. It is initially null. + * A URL's query is either null or an ASCII string. It is initially null. */ std::optional query{}; /** * @private - * A URL’s fragment is either null or an ASCII string that can be used for - * further processing on the resource the URL’s other components identify. It + * A URL's fragment is either null or an ASCII string that can be used for + * further processing on the resource the URL's other components identify. It * is initially null. */ std::optional hash{}; @@ -4981,7 +5204,7 @@ struct url : url_base { /** * Returns a JSON string representation of this URL. */ - std::string to_string() const override; + [[nodiscard]] std::string to_string() const override; /** * @see https://url.spec.whatwg.org/#dom-url-href @@ -4990,7 +5213,7 @@ struct url : url_base { [[nodiscard]] ada_really_inline std::string get_href() const noexcept; /** - * The origin getter steps are to return the serialization of this’s URL’s + * The origin getter steps are to return the serialization of this's URL's * origin. [HTML] * @return a newly allocated string. * @see https://url.spec.whatwg.org/#concept-url-origin @@ -4998,7 +5221,7 @@ struct url : url_base { [[nodiscard]] std::string get_origin() const noexcept override; /** - * The protocol getter steps are to return this’s URL’s scheme, followed by + * The protocol getter steps are to return this's URL's scheme, followed by * U+003A (:). * @return a newly allocated string. * @see https://url.spec.whatwg.org/#dom-url-protocol @@ -5006,7 +5229,7 @@ struct url : url_base { [[nodiscard]] std::string get_protocol() const noexcept; /** - * Return url’s host, serialized, followed by U+003A (:) and url’s port, + * Return url's host, serialized, followed by U+003A (:) and url's port, * serialized. * When there is no host, this function returns the empty string. * @return a newly allocated string. @@ -5015,7 +5238,7 @@ struct url : url_base { [[nodiscard]] std::string get_host() const noexcept; /** - * Return this’s URL’s host, serialized. + * Return this's URL's host, serialized. * When there is no host, this function returns the empty string. * @return a newly allocated string. * @see https://url.spec.whatwg.org/#dom-url-hostname @@ -5024,29 +5247,29 @@ struct url : url_base { /** * The pathname getter steps are to return the result of URL path serializing - * this’s URL. + * this's URL. * @return a newly allocated string. * @see https://url.spec.whatwg.org/#dom-url-pathname */ - [[nodiscard]] const std::string_view get_pathname() const noexcept; + [[nodiscard]] std::string_view get_pathname() const noexcept; /** - * Compute the pathname length in bytes witout instantiating a view or a + * Compute the pathname length in bytes without instantiating a view or a * string. * @return size of the pathname in bytes * @see https://url.spec.whatwg.org/#dom-url-pathname */ - ada_really_inline size_t get_pathname_length() const noexcept; + [[nodiscard]] ada_really_inline size_t get_pathname_length() const noexcept; /** - * Return U+003F (?), followed by this’s URL’s query. + * Return U+003F (?), followed by this's URL's query. * @return a newly allocated string. * @see https://url.spec.whatwg.org/#dom-url-search */ [[nodiscard]] std::string get_search() const noexcept; /** - * The username getter steps are to return this’s URL’s username. + * The username getter steps are to return this's URL's username. * @return a constant reference to the underlying string. * @see https://url.spec.whatwg.org/#dom-url-username */ @@ -5056,77 +5279,77 @@ struct url : url_base { * @return Returns true on successful operation. * @see https://url.spec.whatwg.org/#dom-url-username */ - bool set_username(const std::string_view input); + bool set_username(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-password */ - bool set_password(const std::string_view input); + bool set_password(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-port */ - bool set_port(const std::string_view input); + bool set_port(std::string_view input); /** * This function always succeeds. * @see https://url.spec.whatwg.org/#dom-url-hash */ - void set_hash(const std::string_view input); + void set_hash(std::string_view input); /** * This function always succeeds. * @see https://url.spec.whatwg.org/#dom-url-search */ - void set_search(const std::string_view input); + void set_search(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-search */ - bool set_pathname(const std::string_view input); + bool set_pathname(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-host */ - bool set_host(const std::string_view input); + bool set_host(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-hostname */ - bool set_hostname(const std::string_view input); + bool set_hostname(std::string_view input); /** * @return Returns true on success. * @see https://url.spec.whatwg.org/#dom-url-protocol */ - bool set_protocol(const std::string_view input); + bool set_protocol(std::string_view input); /** * @see https://url.spec.whatwg.org/#dom-url-href */ - bool set_href(const std::string_view input); + bool set_href(std::string_view input); /** - * The password getter steps are to return this’s URL’s password. + * The password getter steps are to return this's URL's password. * @return a constant reference to the underlying string. * @see https://url.spec.whatwg.org/#dom-url-password */ [[nodiscard]] const std::string &get_password() const noexcept; /** - * Return this’s URL’s port, serialized. + * Return this's URL's port, serialized. * @return a newly constructed string representing the port. * @see https://url.spec.whatwg.org/#dom-url-port */ [[nodiscard]] std::string get_port() const noexcept; /** - * Return U+0023 (#), followed by this’s URL’s fragment. + * Return U+0023 (#), followed by this's URL's fragment. * @return a newly constructed string representing the hash. * @see https://url.spec.whatwg.org/#dom-url-hash */ @@ -5180,9 +5403,9 @@ struct url : url_base { inline void update_base_search(std::string_view input, const uint8_t query_percent_encode_set[]); inline void update_base_search(std::optional input); - inline void update_base_pathname(const std::string_view input); - inline void update_base_username(const std::string_view input); - inline void update_base_password(const std::string_view input); + inline void update_base_pathname(std::string_view input); + inline void update_base_username(std::string_view input); + inline void update_base_password(std::string_view input); inline void update_base_port(std::optional input); /** @@ -5212,7 +5435,7 @@ struct url : url_base { [[nodiscard]] bool parse_opaque_host(std::string_view input); /** - * A URL’s scheme is an ASCII string that identifies the type of URL and can + * A URL's scheme is an ASCII string that identifies the type of URL and can * be used to dispatch a URL for further processing after parsing. It is * initially the empty string. We only set non_special_scheme when the scheme * is non-special, otherwise we avoid constructing string. @@ -5228,9 +5451,12 @@ struct url : url_base { */ [[nodiscard]] inline bool cannot_have_credentials_or_port() const; - ada_really_inline size_t - parse_port(std::string_view view, - bool check_trailing_content = false) noexcept override; + ada_really_inline size_t parse_port( + std::string_view view, bool check_trailing_content) noexcept override; + + ada_really_inline size_t parse_port(std::string_view view) noexcept override { + return this->parse_port(view, false); + } /** * Take the scheme from another URL. The scheme string is copied from the @@ -5249,8 +5475,7 @@ struct url : url_base { [[nodiscard]] ada_really_inline bool parse_host(std::string_view input); template - [[nodiscard]] ada_really_inline bool parse_scheme( - const std::string_view input); + [[nodiscard]] ada_really_inline bool parse_scheme(std::string_view input); inline void clear_pathname() override; inline void clear_search() override; @@ -5266,7 +5491,7 @@ struct url : url_base { * * @see https://url.spec.whatwg.org/ */ - ada_really_inline void parse_path(const std::string_view input); + ada_really_inline void parse_path(std::string_view input); /** * Set the scheme for this URL. The provided scheme should be a valid @@ -5353,7 +5578,9 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u) { return out << u.to_string(); } -size_t url::get_pathname_length() const noexcept { return path.size(); } +[[nodiscard]] size_t url::get_pathname_length() const noexcept { + return path.size(); +} [[nodiscard]] ada_really_inline ada::url_components url::get_components() const noexcept { @@ -5394,8 +5621,8 @@ size_t url::get_pathname_length() const noexcept { return path.size(); } out.host_end = out.host_start; if (!has_opaque_path && checkers::begins_with(path, "//")) { - // If url’s host is null, url does not have an opaque path, url’s path’s - // size is greater than 1, and url’s path[0] is the empty string, then + // If url's host is null, url does not have an opaque path, url's path's + // size is greater than 1, and url's path[0] is the empty string, then // append U+002F (/) followed by U+002E (.) to output. running_index = out.protocol_end + 2; } else { @@ -5509,8 +5736,8 @@ inline void url::copy_scheme(const ada::url &u) { output += ":" + get_port(); } } else if (!has_opaque_path && checkers::begins_with(path, "//")) { - // If url’s host is null, url does not have an opaque path, url’s path’s - // size is greater than 1, and url’s path[0] is the empty string, then + // If url's host is null, url does not have an opaque path, url's path's + // size is greater than 1, and url's path[0] is the empty string, then // append U+002F (/) followed by U+002E (.) to output. output += "/."; } @@ -5544,7 +5771,11 @@ ada_really_inline size_t url::parse_port(std::string_view view, } ada_log("parse_port: is_valid = ", is_valid); if (is_valid) { - port = (r.ec == std::errc() && scheme_default_port() != parsed_port) + // scheme_default_port can return 0, and we should allow 0 as a base port. + auto default_port = scheme_default_port(); + bool is_port_valid = (default_port == 0 && parsed_port == 0) || + (default_port != parsed_port); + port = (r.ec == std::errc() && is_port_valid) ? std::optional(parsed_port) : std::nullopt; } @@ -5573,6 +5804,10 @@ ada_really_inline size_t url::parse_port(std::string_view view, #include /** + * Unicode operations. These functions are not part of our public API and may + * change at any time. + * + * private * @namespace ada::unicode * @brief Includes the declarations for unicode operations */ @@ -5726,7 +5961,7 @@ inline void url_aggregator::update_base_hostname(const std::string_view input) { ADA_ASSERT_TRUE(validate()); } -ada_really_inline uint32_t +[[nodiscard]] ada_really_inline uint32_t url_aggregator::get_pathname_length() const noexcept { ada_log("url_aggregator::get_pathname_length"); uint32_t ending_index = uint32_t(buffer.size()); @@ -5850,7 +6085,7 @@ inline void url_aggregator::update_base_pathname(const std::string_view input) { if (begins_with_dashdash && !has_opaque_path && !has_authority() && !has_dash_dot()) { - // If url’s host is null, url does not have an opaque path, url’s path’s + // If url's host is null, url does not have an opaque path, url's path's // size is greater than 1, then append U+002F (/) followed by U+002E (.) to // output. buffer.insert(components.pathname_start, "/."); @@ -6161,7 +6396,7 @@ inline void url_aggregator::clear_port() { ADA_ASSERT_TRUE(validate()); } -inline uint32_t url_aggregator::retrieve_base_port() const { +[[nodiscard]] inline uint32_t url_aggregator::retrieve_base_port() const { ada_log("url_aggregator::retrieve_base_port"); return components.port; } @@ -6274,7 +6509,9 @@ inline void url_aggregator::clear_hostname() { " with " + components.to_string() + "\n" + to_diagram()); #endif ADA_ASSERT_TRUE(has_authority()); - ADA_ASSERT_TRUE(has_empty_hostname()); + ADA_ASSERT_EQUAL(has_empty_hostname(), true, + "hostname should have been cleared on buffer=" + buffer + + " with " + components.to_string() + "\n" + to_diagram()); ADA_ASSERT_TRUE(validate()); } @@ -6318,7 +6555,7 @@ inline void ada::url_aggregator::add_authority_slashes_if_needed() noexcept { ADA_ASSERT_TRUE(validate()); // Protocol setter will insert `http:` to the URL. It is up to hostname setter // to insert - // `//` initially to the buffer, since it depends on the hostname existance. + // `//` initially to the buffer, since it depends on the hostname existence. if (has_authority()) { return; } @@ -6379,31 +6616,45 @@ inline bool url_aggregator::has_hostname() const noexcept { inline bool url_aggregator::has_port() const noexcept { ada_log("url_aggregator::has_port"); - return components.pathname_start != components.host_end; + // A URL cannot have a username/password/port if its host is null or the empty + // string, or its scheme is "file". + return has_hostname() && components.pathname_start != components.host_end; } -inline bool url_aggregator::has_dash_dot() const noexcept { - // If url’s host is null, url does not have an opaque path, url’s path’s size - // is greater than 1, and url’s path[0] is the empty string, then append +[[nodiscard]] inline bool url_aggregator::has_dash_dot() const noexcept { + // If url's host is null, url does not have an opaque path, url's path's size + // is greater than 1, and url's path[0] is the empty string, then append // U+002F (/) followed by U+002E (.) to output. ada_log("url_aggregator::has_dash_dot"); - // Performance: instead of doing this potentially expensive check, we could - // just have a boolean value in the structure. #if ADA_DEVELOPMENT_CHECKS - if (components.pathname_start + 1 < buffer.size() && - components.pathname_start == components.host_end + 2) { - ADA_ASSERT_TRUE(buffer[components.host_end] == '/'); - ADA_ASSERT_TRUE(buffer[components.host_end + 1] == '.'); + // If pathname_start and host_end are exactly two characters apart, then we + // either have a one-digit port such as http://test.com:5?param=1 or else we + // have a /.: sequence such as "non-spec:/.//". We test that this is the case. + if (components.pathname_start == components.host_end + 2) { + ADA_ASSERT_TRUE((buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.') || + (buffer[components.host_end] == ':' && + checkers::is_digit(buffer[components.host_end + 1]))); + } + if (components.pathname_start == components.host_end + 2 && + buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.') { + ADA_ASSERT_TRUE(components.pathname_start + 1 < buffer.size()); ADA_ASSERT_TRUE(buffer[components.pathname_start] == '/'); ADA_ASSERT_TRUE(buffer[components.pathname_start + 1] == '/'); } #endif - return !has_opaque_path && - components.pathname_start == components.host_end + 2 && - components.pathname_start + 1 < buffer.size(); + // Performance: it should be uncommon for components.pathname_start == + // components.host_end + 2 to be true. So we put this check first in the + // sequence. Most times, we do not have an opaque path. Checking for '/.' is + // more expensive, but should be uncommon. + return components.pathname_start == components.host_end + 2 && + !has_opaque_path && buffer[components.host_end] == '/' && + buffer[components.host_end + 1] == '.'; } -inline std::string_view url_aggregator::get_href() const noexcept { +[[nodiscard]] inline std::string_view url_aggregator::get_href() + const noexcept { ada_log("url_aggregator::get_href"); return buffer; } @@ -6428,7 +6679,12 @@ ada_really_inline size_t url_aggregator::parse_port( } ada_log("parse_port: is_valid = ", is_valid); if (is_valid) { - if (r.ec == std::errc() && scheme_default_port() != parsed_port) { + ada_log("parse_port", r.ec == std::errc()); + // scheme_default_port can return 0, and we should allow 0 as a base port. + auto default_port = scheme_default_port(); + bool is_port_valid = (default_port == 0 && parsed_port == 0) || + (default_port != parsed_port); + if (r.ec == std::errc() && is_port_valid) { update_base_port(parsed_port); } else { clear_port(); @@ -6475,6 +6731,402 @@ inline std::ostream &operator<<(std::ostream &out, #endif // ADA_URL_AGGREGATOR_INL_H /* end file include/ada/url_aggregator-inl.h */ +/* begin file include/ada/url_search_params.h */ +/** + * @file url_search_params.h + * @brief Declaration for the URL Search Params + */ +#ifndef ADA_URL_SEARCH_PARAMS_H +#define ADA_URL_SEARCH_PARAMS_H + +#include +#include +#include +#include + +namespace ada { + +enum class url_search_params_iter_type { + KEYS, + VALUES, + ENTRIES, +}; + +template +struct url_search_params_iter; + +typedef std::pair key_value_view_pair; + +using url_search_params_keys_iter = + url_search_params_iter; +using url_search_params_values_iter = + url_search_params_iter; +using url_search_params_entries_iter = + url_search_params_iter; + +/** + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ +struct url_search_params { + url_search_params() = default; + + /** + * @see + * https://github.com/web-platform-tests/wpt/blob/master/url/urlsearchparams-constructor.any.js + */ + url_search_params(const std::string_view input) { initialize(input); } + + url_search_params(const url_search_params &u) = default; + url_search_params(url_search_params &&u) noexcept = default; + url_search_params &operator=(url_search_params &&u) noexcept = default; + url_search_params &operator=(const url_search_params &u) = default; + ~url_search_params() = default; + + [[nodiscard]] inline size_t size() const noexcept; + + /** + * @see https://url.spec.whatwg.org/#dom-urlsearchparams-append + */ + inline void append(std::string_view key, std::string_view value); + + /** + * @see https://url.spec.whatwg.org/#dom-urlsearchparams-delete + */ + inline void remove(std::string_view key); + inline void remove(std::string_view key, std::string_view value); + + /** + * @see https://url.spec.whatwg.org/#dom-urlsearchparams-get + */ + inline std::optional get(std::string_view key); + + /** + * @see https://url.spec.whatwg.org/#dom-urlsearchparams-getall + */ + inline std::vector get_all(std::string_view key); + + /** + * @see https://url.spec.whatwg.org/#dom-urlsearchparams-has + */ + inline bool has(std::string_view key) noexcept; + inline bool has(std::string_view key, std::string_view value) noexcept; + + /** + * @see https://url.spec.whatwg.org/#dom-urlsearchparams-set + */ + inline void set(std::string_view key, std::string_view value); + + /** + * @see https://url.spec.whatwg.org/#dom-urlsearchparams-sort + */ + inline void sort(); + + /** + * @see https://url.spec.whatwg.org/#urlsearchparams-stringification-behavior + */ + inline std::string to_string(); + + /** + * Returns a simple JS-style iterator over all of the keys in this + * url_search_params. The keys in the iterator are not unique. The valid + * lifespan of the iterator is tied to the url_search_params. The iterator + * must be freed when you're done with it. + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ + inline url_search_params_keys_iter get_keys(); + + /** + * Returns a simple JS-style iterator over all of the values in this + * url_search_params. The valid lifespan of the iterator is tied to the + * url_search_params. The iterator must be freed when you're done with it. + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ + inline url_search_params_values_iter get_values(); + + /** + * Returns a simple JS-style iterator over all of the entries in this + * url_search_params. The entries are pairs of keys and corresponding values. + * The valid lifespan of the iterator is tied to the url_search_params. The + * iterator must be freed when you're done with it. + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ + inline url_search_params_entries_iter get_entries(); + + /** + * C++ style conventional iterator support. const only because we + * do not really want the params to be modified via the iterator. + */ + inline auto begin() const { return params.begin(); } + inline auto end() const { return params.end(); } + inline auto front() const { return params.front(); } + inline auto back() const { return params.back(); } + inline auto operator[](size_t index) const { return params[index]; } + + private: + typedef std::pair key_value_pair; + std::vector params{}; + + /** + * @see https://url.spec.whatwg.org/#concept-urlencoded-parser + */ + void initialize(std::string_view init); + + template + friend struct url_search_params_iter; +}; // url_search_params + +/** + * Implements a non-conventional iterator pattern that is closer in style to + * JavaScript's definition of an iterator. + * + * @see https://webidl.spec.whatwg.org/#idl-iterable + */ +template +struct url_search_params_iter { + inline url_search_params_iter() : params(EMPTY) {} + url_search_params_iter(const url_search_params_iter &u) = default; + url_search_params_iter(url_search_params_iter &&u) noexcept = default; + url_search_params_iter &operator=(url_search_params_iter &&u) noexcept = + default; + url_search_params_iter &operator=(const url_search_params_iter &u) = default; + ~url_search_params_iter() = default; + + /** + * Return the next item in the iterator or std::nullopt if done. + */ + inline std::optional next(); + + inline bool has_next(); + + private: + static url_search_params EMPTY; + inline url_search_params_iter(url_search_params ¶ms_) : params(params_) {} + + url_search_params ¶ms; + size_t pos = 0; + + friend struct url_search_params; +}; + +} // namespace ada +#endif +/* end file include/ada/url_search_params.h */ +/* begin file include/ada/url_search_params-inl.h */ +/** + * @file url_search_params-inl.h + * @brief Inline declarations for the URL Search Params + */ +#ifndef ADA_URL_SEARCH_PARAMS_INL_H +#define ADA_URL_SEARCH_PARAMS_INL_H + + +#include +#include +#include +#include +#include + +namespace ada { + +// A default, empty url_search_params for use with empty iterators. +template +url_search_params url_search_params_iter::EMPTY; + +inline void url_search_params::initialize(std::string_view input) { + if (!input.empty() && input.front() == '?') { + input.remove_prefix(1); + } + + auto process_key_value = [&](const std::string_view current) { + auto equal = current.find('='); + + if (equal == std::string_view::npos) { + auto name = std::string(current); + std::replace(name.begin(), name.end(), '+', ' '); + params.emplace_back(unicode::percent_decode(name, name.find('%')), ""); + } else { + auto name = std::string(current.substr(0, equal)); + auto value = std::string(current.substr(equal + 1)); + + std::replace(name.begin(), name.end(), '+', ' '); + std::replace(value.begin(), value.end(), '+', ' '); + + params.emplace_back(unicode::percent_decode(name, name.find('%')), + unicode::percent_decode(value, value.find('%'))); + } + }; + + while (!input.empty()) { + auto ampersand_index = input.find('&'); + + if (ampersand_index == std::string_view::npos) { + if (!input.empty()) { + process_key_value(input); + } + break; + } else if (ampersand_index != 0) { + process_key_value(input.substr(0, ampersand_index)); + } + + input.remove_prefix(ampersand_index + 1); + } +} + +inline void url_search_params::append(const std::string_view key, + const std::string_view value) { + params.emplace_back(key, value); +} + +inline size_t url_search_params::size() const noexcept { return params.size(); } + +inline std::optional url_search_params::get( + const std::string_view key) { + auto entry = std::find_if(params.begin(), params.end(), + [&key](auto ¶m) { return param.first == key; }); + + if (entry == params.end()) { + return std::nullopt; + } + + return entry->second; +} + +inline std::vector url_search_params::get_all( + const std::string_view key) { + std::vector out{}; + + for (auto ¶m : params) { + if (param.first == key) { + out.emplace_back(param.second); + } + } + + return out; +} + +inline bool url_search_params::has(const std::string_view key) noexcept { + auto entry = std::find_if(params.begin(), params.end(), + [&key](auto ¶m) { return param.first == key; }); + return entry != params.end(); +} + +inline bool url_search_params::has(std::string_view key, + std::string_view value) noexcept { + auto entry = + std::find_if(params.begin(), params.end(), [&key, &value](auto ¶m) { + return param.first == key && param.second == value; + }); + return entry != params.end(); +} + +inline std::string url_search_params::to_string() { + auto character_set = ada::character_sets::WWW_FORM_URLENCODED_PERCENT_ENCODE; + std::string out{}; + for (size_t i = 0; i < params.size(); i++) { + auto key = ada::unicode::percent_encode(params[i].first, character_set); + auto value = ada::unicode::percent_encode(params[i].second, character_set); + + // Performance optimization: Move this inside percent_encode. + std::replace(key.begin(), key.end(), ' ', '+'); + std::replace(value.begin(), value.end(), ' ', '+'); + + if (i != 0) { + out += "&"; + } + out.append(key); + out += "="; + out.append(value); + } + return out; +} + +inline void url_search_params::set(const std::string_view key, + const std::string_view value) { + const auto find = [&key](auto ¶m) { return param.first == key; }; + + auto it = std::find_if(params.begin(), params.end(), find); + + if (it == params.end()) { + params.emplace_back(key, value); + } else { + it->second = value; + params.erase(std::remove_if(std::next(it), params.end(), find), + params.end()); + } +} + +inline void url_search_params::remove(const std::string_view key) { + params.erase( + std::remove_if(params.begin(), params.end(), + [&key](auto ¶m) { return param.first == key; }), + params.end()); +} + +inline void url_search_params::remove(const std::string_view key, + const std::string_view value) { + params.erase(std::remove_if(params.begin(), params.end(), + [&key, &value](auto ¶m) { + return param.first == key && + param.second == value; + }), + params.end()); +} + +inline void url_search_params::sort() { + std::stable_sort(params.begin(), params.end(), + [](const key_value_pair &lhs, const key_value_pair &rhs) { + return lhs.first < rhs.first; + }); +} + +inline url_search_params_keys_iter url_search_params::get_keys() { + return url_search_params_keys_iter(*this); +} + +/** + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ +inline url_search_params_values_iter url_search_params::get_values() { + return url_search_params_values_iter(*this); +} + +/** + * @see https://url.spec.whatwg.org/#interface-urlsearchparams + */ +inline url_search_params_entries_iter url_search_params::get_entries() { + return url_search_params_entries_iter(*this); +} + +template +inline bool url_search_params_iter::has_next() { + return pos < params.params.size(); +} + +template <> +inline std::optional url_search_params_keys_iter::next() { + if (!has_next()) return std::nullopt; + return params.params[pos++].first; +} + +template <> +inline std::optional url_search_params_values_iter::next() { + if (!has_next()) return std::nullopt; + return params.params[pos++].second; +} + +template <> +inline std::optional +url_search_params_entries_iter::next() { + if (!has_next()) return std::nullopt; + return params.params[pos++]; +} + +} // namespace ada + +#endif // ADA_URL_SEARCH_PARAMS_INL_H +/* end file include/ada/url_search_params-inl.h */ // Public API /* begin file include/ada/ada_version.h */ @@ -6485,14 +7137,14 @@ inline std::ostream &operator<<(std::ostream &out, #ifndef ADA_ADA_VERSION_H #define ADA_ADA_VERSION_H -#define ADA_VERSION "2.4.2" +#define ADA_VERSION "2.7.7" namespace ada { enum { ADA_VERSION_MAJOR = 2, - ADA_VERSION_MINOR = 4, - ADA_VERSION_REVISION = 2, + ADA_VERSION_MINOR = 7, + ADA_VERSION_REVISION = 7, }; } // namespace ada diff --git a/ada_c.h b/ada_c.h index f8bcbdc..173e27b 100644 --- a/ada_c.h +++ b/ada_c.h @@ -51,6 +51,7 @@ bool ada_can_parse_with_base(const char* input, size_t input_length, void ada_free(ada_url result); void ada_free_owned_string(ada_owned_string owned); +ada_url ada_copy(ada_url input); bool ada_is_valid(ada_url result); @@ -67,6 +68,8 @@ ada_string ada_get_hostname(ada_url result); ada_string ada_get_pathname(ada_url result); ada_string ada_get_search(ada_url result); ada_string ada_get_protocol(ada_url result); +uint8_t ada_get_host_type(ada_url result); +uint8_t ada_get_scheme_type(ada_url result); // url_aggregator setters // if ada_is_valid(result)) is false, the setters have no effect @@ -82,6 +85,11 @@ bool ada_set_pathname(ada_url result, const char* input, size_t length); void ada_set_search(ada_url result, const char* input, size_t length); void ada_set_hash(ada_url result, const char* input, size_t length); +// url_aggregator clear methods +void ada_clear_port(ada_url result); +void ada_clear_hash(ada_url result); +void ada_clear_search(ada_url result); + // url_aggregator functions // if ada_is_valid(result) is false, functions below will return false bool ada_has_credentials(ada_url result); @@ -97,4 +105,81 @@ bool ada_has_search(ada_url result); // returns a pointer to the internal url_aggregator::url_components const ada_url_components* ada_get_components(ada_url result); +// idna methods +ada_owned_string ada_idna_to_unicode(const char* input, size_t length); +ada_owned_string ada_idna_to_ascii(const char* input, size_t length); + +// url search params +typedef void* ada_url_search_params; + +// Represents an std::vector +typedef void* ada_strings; +typedef void* ada_url_search_params_keys_iter; +typedef void* ada_url_search_params_values_iter; + +typedef struct { + ada_string key; + ada_string value; +} ada_string_pair; + +typedef void* ada_url_search_params_entries_iter; + +ada_url_search_params ada_parse_search_params(const char* input, size_t length); +void ada_free_search_params(ada_url_search_params result); + +size_t ada_search_params_size(ada_url_search_params result); +void ada_search_params_sort(ada_url_search_params result); +ada_owned_string ada_search_params_to_string(ada_url_search_params result); + +void ada_search_params_append(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length); +void ada_search_params_set(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length); +void ada_search_params_remove(ada_url_search_params result, const char* key, + size_t key_length); +void ada_search_params_remove_value(ada_url_search_params result, + const char* key, size_t key_length, + const char* value, size_t value_length); +bool ada_search_params_has(ada_url_search_params result, const char* key, + size_t key_length); +bool ada_search_params_has_value(ada_url_search_params result, const char* key, + size_t key_length, const char* value, + size_t value_length); +ada_string ada_search_params_get(ada_url_search_params result, const char* key, + size_t key_length); +ada_strings ada_search_params_get_all(ada_url_search_params result, + const char* key, size_t key_length); +ada_url_search_params_keys_iter ada_search_params_get_keys( + ada_url_search_params result); +ada_url_search_params_values_iter ada_search_params_get_values( + ada_url_search_params result); +ada_url_search_params_entries_iter ada_search_params_get_entries( + ada_url_search_params result); + +void ada_free_strings(ada_strings result); +size_t ada_strings_size(ada_strings result); +ada_string ada_strings_get(ada_strings result, size_t index); + +void ada_free_search_params_keys_iter(ada_url_search_params_keys_iter result); +ada_string ada_search_params_keys_iter_next( + ada_url_search_params_keys_iter result); +bool ada_search_params_keys_iter_has_next( + ada_url_search_params_keys_iter result); + +void ada_free_search_params_values_iter( + ada_url_search_params_values_iter result); +ada_string ada_search_params_values_iter_next( + ada_url_search_params_values_iter result); +bool ada_search_params_values_iter_has_next( + ada_url_search_params_values_iter result); + +void ada_free_search_params_entries_iter( + ada_url_search_params_entries_iter result); +ada_string_pair ada_search_params_entries_iter_next( + ada_url_search_params_entries_iter result); +bool ada_search_params_entries_iter_has_next( + ada_url_search_params_entries_iter result); + #endif // ADA_C_H