14 const result_type* base_url) {
20 constexpr bool result_type_is_ada_url = std::is_same_v<url, result_type>;
21 constexpr bool result_type_is_ada_url_aggregator =
22 std::is_same_v<url_aggregator, result_type>;
23 static_assert(result_type_is_ada_url ||
24 result_type_is_ada_url_aggregator);
27 ada_log(
"ada::parser::parse_url('", user_input,
"' [", user_input.size(),
28 " bytes],", (base_url !=
nullptr ? base_url->to_string() :
"null"),
36 if (user_input.size() > std::numeric_limits<uint32_t>::max()) [[unlikely]] {
42 if (base_url !=
nullptr) {
48 if constexpr (result_type_is_ada_url_aggregator && store_values) {
60 uint32_t reserve_capacity =
62 helpers::leading_zeroes(uint32_t(1 | user_input.size()))) +
64 url.reserve(reserve_capacity);
66 std::string tmp_buffer;
67 std::string_view url_data;
68 if (unicode::has_tabs_or_newline(user_input)) [[unlikely]] {
69 tmp_buffer = user_input;
72 helpers::remove_ascii_tab_or_newline(tmp_buffer);
73 url_data = tmp_buffer;
75 url_data = user_input;
80 helpers::trim_c0_whitespace(url_data);
83 std::optional<std::string_view> fragment = helpers::prune_hash(url_data);
92 size_t input_position = 0;
93 const size_t input_size = url_data.size();
98 while (input_position <= input_size) {
99 ada_log(
"In parsing at ", input_position,
" out of ", input_size,
103 ada_log(
"SCHEME_START ", helpers::substring(url_data, input_position));
106 if ((input_position != input_size) &&
118 ada_log(
"SCHEME ", helpers::substring(url_data, input_position));
121 while ((input_position != input_size) &&
122 (unicode::is_alnum_plus(url_data[input_position]))) {
126 if ((input_position != input_size) &&
127 (url_data[input_position] ==
':')) {
128 ada_log(
"SCHEME the scheme should be ",
129 url_data.substr(0, input_position));
130 if constexpr (result_type_is_ada_url) {
131 if (!
url.parse_scheme(url_data.substr(0, input_position))) {
136 if (!
url.parse_scheme_with_colon(
137 url_data.substr(0, input_position + 1))) {
152 base_url->type ==
url.type) {
163 else if (input_position + 1 < input_size &&
164 url_data[input_position + 1] ==
'/') {
186 ada_log(
"NO_SCHEME ", helpers::substring(url_data, input_position));
189 if (base_url ==
nullptr ||
190 (base_url->has_opaque_path && !fragment.has_value())) {
191 ada_log(
"NO_SCHEME validation error");
198 else if (base_url->has_opaque_path && fragment.has_value() &&
199 input_position == input_size) {
200 ada_log(
"NO_SCHEME opaque base with fragment");
201 url.copy_scheme(*base_url);
204 if constexpr (result_type_is_ada_url) {
205 url.path = base_url->path;
206 url.query = base_url->query;
208 url.update_base_pathname(base_url->get_pathname());
209 url.update_base_search(base_url->get_search());
211 url.update_unencoded_base_hash(*fragment);
217 ada_log(
"NO_SCHEME non-file relative path");
222 ada_log(
"NO_SCHEME file base type");
228 ada_log(
"AUTHORITY ", helpers::substring(url_data, input_position));
239 if (url_data.find(
'@', input_position) == std::string_view::npos) {
243 bool at_sign_seen{
false};
244 bool password_token_seen{
false};
251 std::string_view view = url_data.substr(input_position);
254 url.
is_special() ? helpers::find_authority_delimiter_special(view)
255 : helpers::find_authority_delimiter(view);
256 std::string_view authority_view = view.substr(0, location);
257 size_t end_of_authority = input_position + authority_view.size();
259 if ((end_of_authority != input_size) &&
260 (url_data[end_of_authority] ==
'@')) {
263 if (password_token_seen) {
264 if constexpr (result_type_is_ada_url) {
265 url.password +=
"%40";
267 url.append_base_password(
"%40");
270 if constexpr (result_type_is_ada_url) {
271 url.username +=
"%40";
273 url.append_base_username(
"%40");
280 if (!password_token_seen) {
281 size_t password_token_location = authority_view.find(
':');
282 password_token_seen =
283 password_token_location != std::string_view::npos;
285 if constexpr (store_values) {
286 if (!password_token_seen) {
287 if constexpr (result_type_is_ada_url) {
288 url.username += unicode::percent_encode(
292 url.append_base_username(unicode::percent_encode(
297 if constexpr (result_type_is_ada_url) {
298 url.username += unicode::percent_encode(
299 authority_view.substr(0, password_token_location),
301 url.password += unicode::percent_encode(
302 authority_view.substr(password_token_location + 1),
305 url.append_base_username(unicode::percent_encode(
306 authority_view.substr(0, password_token_location),
308 url.append_base_password(unicode::percent_encode(
309 authority_view.substr(password_token_location + 1),
314 }
else if constexpr (store_values) {
315 if constexpr (result_type_is_ada_url) {
316 url.password += unicode::percent_encode(
319 url.append_base_password(unicode::percent_encode(
327 else if (end_of_authority == input_size ||
328 url_data[end_of_authority] ==
'/' ||
329 url_data[end_of_authority] ==
'?' ||
333 if (at_sign_seen && authority_view.empty()) {
340 if (end_of_authority == input_size) {
341 if constexpr (store_values) {
342 if (fragment.has_value()) {
343 url.update_unencoded_base_hash(*fragment);
348 input_position = end_of_authority + 1;
354 ada_log(
"SPECIAL_RELATIVE_OR_AUTHORITY ",
355 helpers::substring(url_data, input_position));
360 if (url_data.substr(input_position, 2) ==
"//") {
372 ada_log(
"PATH_OR_AUTHORITY ",
373 helpers::substring(url_data, input_position));
376 if ((input_position != input_size) &&
377 (url_data[input_position] ==
'/')) {
388 ada_log(
"RELATIVE_SCHEME ",
389 helpers::substring(url_data, input_position));
392 url.copy_scheme(*base_url);
395 if ((input_position != input_size) &&
396 (url_data[input_position] ==
'/')) {
398 "RELATIVE_SCHEME if c is U+002F (/), then set state to relative "
401 }
else if (
url.
is_special() && (input_position != input_size) &&
402 (url_data[input_position] ==
'\\')) {
406 "RELATIVE_SCHEME if url is special and c is U+005C, validation "
407 "error, set state to relative slash state");
410 ada_log(
"RELATIVE_SCHEME otherwise");
415 if constexpr (result_type_is_ada_url) {
416 url.username = base_url->username;
417 url.password = base_url->password;
418 url.host = base_url->host;
419 url.port = base_url->port;
422 url.path = base_url->path;
423 url.query = base_url->query;
425 url.update_base_authority(base_url->get_href(),
426 base_url->get_components());
427 url.update_host_to_base_host(base_url->get_hostname());
428 url.update_base_port(base_url->retrieve_base_port());
431 url.update_base_pathname(base_url->get_pathname());
432 url.update_base_search(base_url->get_search());
439 if ((input_position != input_size) &&
440 (url_data[input_position] ==
'?')) {
444 else if (input_position != input_size) {
447 if constexpr (result_type_is_ada_url) {
449 helpers::shorten_path(
url.path,
url.type);
452 if (helpers::shorten_path(path,
url.type)) {
453 url.update_base_pathname(std::move(std::string(path)));
465 ada_log(
"RELATIVE_SLASH ",
466 helpers::substring(url_data, input_position));
470 (url_data[input_position] ==
'/' ||
471 url_data[input_position] ==
'\\')) {
476 else if ((input_position != input_size) &&
477 (url_data[input_position] ==
'/')) {
487 if constexpr (result_type_is_ada_url) {
488 url.username = base_url->username;
489 url.password = base_url->password;
490 url.host = base_url->host;
491 url.port = base_url->port;
493 url.update_base_authority(base_url->get_href(),
494 base_url->get_components());
495 url.update_host_to_base_host(base_url->get_hostname());
496 url.update_base_port(base_url->retrieve_base_port());
506 ada_log(
"SPECIAL_AUTHORITY_SLASHES ",
507 helpers::substring(url_data, input_position));
512 if (url_data.substr(input_position, 2) ==
"//") {
519 ada_log(
"SPECIAL_AUTHORITY_IGNORE_SLASHES ",
520 helpers::substring(url_data, input_position));
524 while ((input_position != input_size) &&
525 ((url_data[input_position] ==
'/') ||
526 (url_data[input_position] ==
'\\'))) {
534 ada_log(
"QUERY ", helpers::substring(url_data, input_position));
535 if constexpr (store_values) {
538 const uint8_t* query_percent_encode_set =
544 url.update_base_search(url_data.substr(input_position),
545 query_percent_encode_set);
546 ada_log(
"QUERY update_base_search completed ");
547 if (fragment.has_value()) {
548 url.update_unencoded_base_hash(*fragment);
554 ada_log(
"HOST ", helpers::substring(url_data, input_position));
556 std::string_view host_view = url_data.substr(input_position);
557 auto [location, found_colon] =
558 helpers::get_host_delimiter_location(
url.
is_special(), host_view);
559 input_position = (location != std::string_view::npos)
560 ? input_position + location
569 ada_log(
"HOST parsing ", host_view);
570 if (!
url.parse_host(host_view)) {
591 ada_log(
"HOST parsing ", host_view,
" href=",
url.
get_href());
594 if (host_view.empty()) {
595 url.update_base_hostname(
"");
596 }
else if (!
url.parse_host(host_view)) {
609 ada_log(
"OPAQUE_PATH ", helpers::substring(url_data, input_position));
610 std::string_view view = url_data.substr(input_position);
613 size_t location = view.find(
'?');
614 if (location != std::string_view::npos) {
615 view.remove_suffix(view.size() - location);
617 input_position += location + 1;
619 input_position = input_size + 1;
625 if (view.ends_with(
' ')) {
626 std::string modified_view =
627 std::string(view.begin(), view.end() - 1) +
"%20";
628 url.update_base_pathname(unicode::percent_encode(
631 url.update_base_pathname(unicode::percent_encode(
637 ada_log(
"PORT ", helpers::substring(url_data, input_position));
638 std::string_view port_view = url_data.substr(input_position);
639 input_position +=
url.parse_port(port_view,
true);
647 ada_log(
"PATH_START ", helpers::substring(url_data, input_position));
656 if (input_position == input_size) {
657 if constexpr (store_values) {
658 url.update_base_pathname(
"/");
659 if (fragment.has_value()) {
660 url.update_unencoded_base_hash(*fragment);
668 if ((url_data[input_position] !=
'/') &&
669 (url_data[input_position] !=
'\\')) {
675 else if ((input_position != input_size) &&
676 (url_data[input_position] ==
'?')) {
680 else if (input_position != input_size) {
685 if (url_data[input_position] !=
'/') {
694 ada_log(
"PATH ", helpers::substring(url_data, input_position));
695 std::string_view view = url_data.substr(input_position);
699 size_t locofquestionmark = view.find(
'?');
700 if (locofquestionmark != std::string_view::npos) {
702 view.remove_suffix(view.size() - locofquestionmark);
703 input_position += locofquestionmark + 1;
705 input_position = input_size + 1;
707 if constexpr (store_values) {
708 if constexpr (result_type_is_ada_url) {
709 helpers::parse_prepared_path(view,
url.type,
url.path);
711 url.consume_prepared_path(view);
718 ada_log(
"FILE_SLASH ", helpers::substring(url_data, input_position));
721 if ((input_position != input_size) &&
722 (url_data[input_position] ==
'/' ||
723 url_data[input_position] ==
'\\')) {
724 ada_log(
"FILE_SLASH c is U+002F or U+005C");
729 ada_log(
"FILE_SLASH otherwise");
735 if constexpr (result_type_is_ada_url) {
736 url.host = base_url->host;
738 url.update_host_to_base_host(base_url->get_host());
744 if (!base_url->get_pathname().empty()) {
746 url_data.substr(input_position))) {
747 std::string_view first_base_url_path =
748 base_url->get_pathname().substr(1);
749 size_t loc = first_base_url_path.find(
'/');
750 if (loc != std::string_view::npos) {
751 helpers::resize(first_base_url_path, loc);
754 first_base_url_path)) {
755 if constexpr (result_type_is_ada_url) {
757 url.path += first_base_url_path;
759 url.append_base_pathname(
760 helpers::concat(
"/", first_base_url_path));
774 ada_log(
"FILE_HOST ", helpers::substring(url_data, input_position));
775 std::string_view view = url_data.substr(input_position);
777 size_t location = view.find_first_of(
"/\\?");
778 std::string_view file_host_buffer(
780 (location != std::string_view::npos) ? location : view.size());
784 }
else if (file_host_buffer.empty()) {
786 if constexpr (result_type_is_ada_url) {
789 url.update_base_hostname(
"");
794 size_t consumed_bytes = file_host_buffer.size();
795 input_position += consumed_bytes;
798 if (!
url.parse_host(file_host_buffer)) {
802 if constexpr (result_type_is_ada_url) {
804 if (
url.host.has_value() &&
url.host.value() ==
"localhost") {
809 url.update_base_hostname(
"");
820 ada_log(
"FILE ", helpers::substring(url_data, input_position));
821 std::string_view file_view = url_data.substr(input_position);
823 url.set_protocol_as_file();
824 if constexpr (result_type_is_ada_url) {
828 url.update_base_hostname(
"");
831 if (input_position != input_size &&
832 (url_data[input_position] ==
'/' ||
833 url_data[input_position] ==
'\\')) {
834 ada_log(
"FILE c is U+002F or U+005C");
842 ada_log(
"FILE base non-null");
843 if constexpr (result_type_is_ada_url) {
844 url.host = base_url->host;
845 url.path = base_url->path;
846 url.query = base_url->query;
848 url.update_host_to_base_host(base_url->get_hostname());
849 url.update_base_pathname(base_url->get_pathname());
850 url.update_base_search(base_url->get_search());
856 if (input_position != input_size && url_data[input_position] ==
'?') {
860 else if (input_position != input_size) {
866 if constexpr (result_type_is_ada_url) {
867 helpers::shorten_path(
url.path,
url.type);
870 if (helpers::shorten_path(path,
url.type)) {
871 url.update_base_pathname(std::move(std::string(path)));
878 url.clear_pathname();
889 ada_log(
"FILE go to path");
901 if constexpr (store_values) {
902 if (fragment.has_value()) {
903 url.update_unencoded_base_hash(*fragment);