14bool url::parse_opaque_host(std::string_view input) {
15 ada_log(
"parse_opaque_host ", input,
" [", input.size(),
" bytes]");
16 if (std::ranges::any_of(input, ada::unicode::is_forbidden_host_code_point)) {
22 host = ada::unicode::percent_encode(
27bool url::parse_ipv4(std::string_view input) {
28 ada_log(
"parse_ipv4 ", input,
" [", input.size(),
" bytes]");
29 if (input.back() ==
'.') {
30 input.remove_suffix(1);
32 size_t digit_count{0};
33 int pure_decimal_count = 0;
34 std::string_view original_input =
38 for (; (digit_count < 4) && !(input.empty()); digit_count++) {
42 if (is_hex && ((input.length() == 2) ||
43 ((input.length() > 2) && (input[2] ==
'.')))) {
46 input.remove_prefix(2);
48 std::from_chars_result r{};
50 r = std::from_chars(input.data() + 2, input.data() + input.size(),
52 }
else if ((input.length() >= 2) && input[0] ==
'0' &&
54 r = std::from_chars(input.data() + 1, input.data() + input.size(),
58 r = std::from_chars(input.data(), input.data() + input.size(),
61 if (r.ec != std::errc()) {
64 input.remove_prefix(r.ptr - input.data());
70 if (segment_result >= (uint64_t(1) << (32 - digit_count * 8))) {
73 ipv4 <<= (32 - digit_count * 8);
74 ipv4 |= segment_result;
79 if ((segment_result > 255) || (input[0] !=
'.')) {
83 ipv4 |= segment_result;
84 input.remove_prefix(1);
87 if ((digit_count != 4) || (!input.empty())) {
92 if (pure_decimal_count == 4) {
93 host = original_input;
102bool url::parse_ipv6(std::string_view input) {
103 ada_log(
"parse_ipv6 ", input,
" [", input.size(),
" bytes]");
109 std::array<uint16_t, 8> address{};
115 std::optional<int> compress{};
118 std::string_view::iterator pointer = input.begin();
121 if (input[0] ==
':') {
124 if (input.size() == 1 || input[1] !=
':') {
125 ada_log(
"parse_ipv6 starts with : but the rest does not start with :");
133 compress = ++piece_index;
137 while (pointer != input.end()) {
139 if (piece_index == 8) {
140 ada_log(
"parse_ipv6 piece_index == 8");
145 if (*pointer ==
':') {
147 if (compress.has_value()) {
148 ada_log(
"parse_ipv6 compress is non-null");
155 compress = ++piece_index;
160 uint16_t value = 0, length = 0;
165 while (length < 4 && pointer != input.end() &&
166 unicode::is_ascii_hex_digit(*pointer)) {
168 value = uint16_t(value * 0x10 + unicode::convert_hex_to_binary(*pointer));
174 if (pointer != input.end() && *pointer ==
'.') {
177 ada_log(
"parse_ipv6 length is 0");
185 if (piece_index > 6) {
186 ada_log(
"parse_ipv6 piece_index > 6");
191 int numbers_seen = 0;
194 while (pointer != input.end()) {
196 std::optional<uint16_t> ipv4_piece{};
199 if (numbers_seen > 0) {
202 if (*pointer ==
'.' && numbers_seen < 4) {
207 ada_log(
"parse_ipv6 Otherwise, validation error, return failure");
215 "parse_ipv6 If c is not an ASCII digit, validation error, return "
223 int number = *pointer -
'0';
226 if (!ipv4_piece.has_value()) {
230 else if (ipv4_piece == 0) {
231 ada_log(
"parse_ipv6 if ipv4Piece is 0, validation error");
236 ipv4_piece = *ipv4_piece * 10 + number;
240 if (ipv4_piece > 255) {
241 ada_log(
"parse_ipv6 ipv4_piece > 255");
252 address[piece_index] =
253 uint16_t(address[piece_index] * 0x100 + *ipv4_piece);
259 if (numbers_seen == 2 || numbers_seen == 4) {
265 if (numbers_seen != 4) {
273 else if ((pointer != input.end()) && (*pointer ==
':')) {
278 if (pointer == input.end()) {
280 "parse_ipv6 If c is the EOF code point, validation error, return "
287 else if (pointer != input.end()) {
289 "parse_ipv6 Otherwise, if c is not the EOF code point, validation "
290 "error, return failure");
295 address[piece_index] = value;
302 if (compress.has_value()) {
304 int swaps = piece_index - *compress;
312 while (piece_index != 0 && swaps > 0) {
313 std::swap(address[piece_index], address[*compress + swaps - 1]);
320 else if (piece_index != 8) {
322 "parse_ipv6 if compress is null and pieceIndex is not 8, validation "
323 "error, return failure");
327 ada_log(
"parse_ipv6 ", *host);
332template <
bool has_state_overr
ide>
340 if (is_input_special) {
341 if constexpr (has_state_override) {
358 host.value().empty()) {
365 if constexpr (has_state_override) {
367 uint16_t urls_scheme_port = get_special_port();
369 if (urls_scheme_port) {
372 if (port.has_value() && *port == urls_scheme_port) {
378 std::string _buffer(input);
383 unicode::to_lower_ascii(_buffer.data(), _buffer.size());
385 if constexpr (has_state_override) {
389 if (
is_special() != ada::scheme::is_special(_buffer)) {
402 host.value().empty()) {
407 set_scheme(std::move(_buffer));
409 if constexpr (has_state_override) {
411 uint16_t urls_scheme_port = get_special_port();
413 if (urls_scheme_port) {
416 if (port.has_value() && *port == urls_scheme_port) {
427 ada_log(
"parse_host ", input,
" [", input.size(),
" bytes]");
432 if (input[0] ==
'[') {
434 if (input.back() !=
']') {
437 ada_log(
"parse_host ipv6");
441 input.remove_prefix(1);
442 input.remove_suffix(1);
443 return parse_ipv6(input);
449 return parse_opaque_host(input);
456 if (!input.empty() && input.back() ==
'.') {
457 host = input.substr(0, input.size() - 1);
462 ada_log(
"parse_host fast path decimal ipv4");
470 std::string buffer = std::string(input);
474 unicode::to_lower_ascii(buffer.data(), buffer.size());
475 bool is_forbidden = unicode::contains_forbidden_domain_code_point(
476 buffer.data(), buffer.size());
477 if (is_forbidden == 0 && buffer.find(
"xn-") == std::string_view::npos) {
479 host = std::move(buffer);
482 if (checkers::is_ipv4(host.value())) {
483 ada_log(
"parse_host fast path ipv4");
484 return parse_ipv4(host.value());
486 ada_log(
"parse_host fast path ", *host);
489 ada_log(
"parse_host calling to_ascii");
490 is_valid = ada::unicode::to_ascii(host, input, input.find(
'%'));
492 ada_log(
"parse_host to_ascii returns false");
495 ada_log(
"parse_host to_ascii succeeded ", *host,
" [", host->size(),
498 if (std::any_of(host.value().begin(), host.value().end(),
499 ada::unicode::is_forbidden_domain_code_point)) {
506 if (checkers::is_ipv4(host.value())) {
507 ada_log(
"parse_host got ipv4 ", *host);
508 return parse_ipv4(host.value());
515 ada_log(
"parse_path ", input);
516 std::string tmp_buffer;
517 std::string_view internal_input;
518 if (unicode::has_tabs_or_newline(input)) {
522 helpers::remove_ascii_tab_or_newline(tmp_buffer);
523 internal_input = tmp_buffer;
525 internal_input = input;
530 if (internal_input.empty()) {
532 }
else if ((internal_input[0] ==
'/') || (internal_input[0] ==
'\\')) {
533 helpers::parse_prepared_path(internal_input.substr(1), type, path);
535 helpers::parse_prepared_path(internal_input, type, path);
537 }
else if (!internal_input.empty()) {
538 if (internal_input[0] ==
'/') {
539 helpers::parse_prepared_path(internal_input.substr(1), type, path);
541 helpers::parse_prepared_path(internal_input, type, path);
544 if (!host.has_value()) {
555 auto back = std::back_insert_iterator(answer);
556 answer.append(
"{\n");
557 answer.append(
"\t\"protocol\":\"");
559 answer.append(
"\",\n");
561 answer.append(
"\t\"username\":\"");
562 helpers::encode_json(username, back);
563 answer.append(
"\",\n");
564 answer.append(
"\t\"password\":\"");
565 helpers::encode_json(password, back);
566 answer.append(
"\",\n");
568 if (host.has_value()) {
569 answer.append(
"\t\"host\":\"");
570 helpers::encode_json(host.value(), back);
571 answer.append(
"\",\n");
573 if (port.has_value()) {
574 answer.append(
"\t\"port\":\"");
575 answer.append(std::to_string(port.value()));
576 answer.append(
"\",\n");
578 answer.append(
"\t\"path\":\"");
579 helpers::encode_json(path, back);
580 answer.append(
"\",\n");
581 answer.append(
"\t\"opaque path\":");
584 answer.append(
",\n");
585 answer.append(
"\t\"query\":\"");
587 helpers::encode_json(query.value(), back);
590 if (hash.has_value()) {
591 answer.append(
",\n");
592 answer.append(
"\t\"hash\":\"");
593 helpers::encode_json(hash.value(), back);
596 answer.append(
"\n}");
601 if (!host.has_value()) {
604 return checkers::verify_dns_length(host.value());
616 if (non_special_scheme ==
"blob") {
623 return ada::helpers::concat(
result->get_protocol(),
"//",
638 return helpers::concat(non_special_scheme,
":");
646 if (!host.has_value()) {
649 if (port.has_value()) {
650 return host.value() +
":" +
get_port();
656 return host.value_or(
"");
662 return (!query.has_value() || (query.value().empty())) ?
""
663 :
"?" + query.value();
675 return port.has_value() ? std::to_string(port.value()) :
"";
681 return (!hash.has_value() || (hash.value().empty())) ?
""
682 :
"#" + hash.value();
685template <
bool overr
ide_hostname>
686bool url::set_host_or_hostname(
const std::string_view input) {
691 std::optional<std::string> previous_host = host;
692 std::optional<uint16_t> previous_port = port;
694 size_t host_end_pos = input.find(
'#');
695 std::string _host(input.data(), host_end_pos != std::string_view::npos
698 helpers::remove_ascii_tab_or_newline(_host);
699 std::string_view new_host(_host);
704 std::string_view host_view(_host.data(), _host.length());
705 auto [location, found_colon] =
706 helpers::get_host_delimiter_location(
is_special(), host_view);
714 std::string_view buffer = host_view.substr(0, location);
715 if (buffer.empty()) {
721 if constexpr (override_hostname) {
726 bool succeeded = parse_host(buffer);
728 host = std::move(previous_host);
729 update_base_port(previous_port);
735 std::string_view port_buffer = new_host.substr(location + 1);
736 if (!port_buffer.empty()) {
765 bool succeeded = parse_host(host_view);
767 host = std::move(previous_host);
768 update_base_port(previous_port);
775 size_t location = new_host.find_first_of(
"/\\?");
776 if (location != std::string_view::npos) {
777 new_host.remove_suffix(new_host.length() - location);
780 if (new_host.empty()) {
785 if (!parse_host(new_host)) {
786 host = std::move(previous_host);
787 update_base_port(previous_port);
792 if (host ==
"localhost") {
800 return set_host_or_hostname<false>(input);
804 return set_host_or_hostname<true>(input);
808 if (cannot_have_credentials_or_port()) {
811 username = ada::unicode::percent_encode(
817 if (cannot_have_credentials_or_port()) {
820 password = ada::unicode::percent_encode(
826 if (cannot_have_credentials_or_port()) {
835 std::string trimmed(input);
836 helpers::remove_ascii_tab_or_newline(trimmed);
838 if (trimmed.empty()) {
843 if (!ada::unicode::is_ascii_digit(trimmed.front())) {
848 auto first_non_digit =
849 std::ranges::find_if_not(trimmed, ada::unicode::is_ascii_digit);
850 std::string_view digits_to_parse =
851 std::string_view(trimmed.data(), first_non_digit - trimmed.begin());
854 std::optional<uint16_t> previous_port = port;
855 parse_port(digits_to_parse);
859 port = std::move(previous_port);
867 helpers::strip_trailing_spaces_from_opaque_path(*
this);
871 std::string new_value;
872 new_value = input[0] ==
'#' ? input.substr(1) : input;
873 helpers::remove_ascii_tab_or_newline(new_value);
874 hash = unicode::percent_encode(new_value,
880 query = std::nullopt;
881 helpers::strip_trailing_spaces_from_opaque_path(*
this);
885 std::string new_value;
886 new_value = input[0] ==
'?' ? input.substr(1) : input;
887 helpers::remove_ascii_tab_or_newline(new_value);
889 auto query_percent_encode_set =
893 query = ada::unicode::percent_encode(new_value, query_percent_encode_set);
906 std::string view(input);
907 helpers::remove_ascii_tab_or_newline(view);
919 std::string::iterator pointer =
920 std::ranges::find_if_not(view, unicode::is_alnum_plus);
922 if (pointer != view.end() && *pointer ==
':') {
923 return parse_scheme<true>(
924 std::string_view(view.data(), pointer - view.begin()));
936 return out.has_value();
#define ada_really_inline
constexpr uint8_t QUERY_PERCENT_ENCODE[32]
constexpr uint8_t SPECIAL_QUERY_PERCENT_ENCODE[32]
constexpr uint8_t C0_CONTROL_PERCENT_ENCODE[32]
constexpr uint8_t USERINFO_PERCENT_ENCODE[32]
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
constexpr bool has_hex_prefix(std::string_view input)
constexpr uint64_t ipv4_fast_fail
constexpr bool is_alpha(char x) noexcept
constexpr bool is_digit(char x) noexcept
ada_really_inline constexpr uint64_t try_parse_ipv4_fast(std::string_view input) noexcept
constexpr std::string_view is_special_list[]
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept
std::string ipv6(const std::array< uint16_t, 8 > &address) noexcept
std::string ipv4(uint64_t address) noexcept
tl::expected< result_type, ada::errors > result
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)
Definitions for the URL scheme.
ada_really_inline constexpr bool is_special() const noexcept
void set_hash(std::string_view input)
std::string get_search() const noexcept
bool set_hostname(std::string_view input)
bool set_host(std::string_view input)
ada_really_inline bool has_credentials() const noexcept
bool set_password(std::string_view input)
void set_search(std::string_view input)
bool set_href(std::string_view input)
bool set_username(std::string_view input)
std::string get_host() const noexcept
std::string get_hash() const noexcept
bool set_pathname(std::string_view input)
std::string get_origin() const noexcept override
std::string get_hostname() const noexcept
const std::string & get_password() const noexcept
bool set_protocol(std::string_view input)
std::string get_port() const noexcept
const std::string & get_username() const noexcept
bool set_port(std::string_view input)
constexpr bool has_search() const noexcept override
std::string to_string() const override
std::string get_protocol() const noexcept
bool has_valid_domain() const noexcept override
Definitions for unicode operations.