16 const result_type* base_url) {
22 constexpr bool result_type_is_ada_url =
23 std::is_same<ada::url, result_type>::value;
24 constexpr bool result_type_is_ada_url_aggregator =
25 std::is_same<ada::url_aggregator, result_type>::value;
26 static_assert(result_type_is_ada_url ||
27 result_type_is_ada_url_aggregator);
30 ada_log(
"ada::parser::parse_url('", user_input,
"' [", user_input.size(),
31 " bytes],", (base_url !=
nullptr ? base_url->to_string() :
"null"),
39 if (user_input.size() > std::numeric_limits<uint32_t>::max()) {
45 if (base_url !=
nullptr) {
51 if constexpr (result_type_is_ada_url_aggregator && store_values) {
63 uint32_t reserve_capacity =
65 helpers::leading_zeroes(uint32_t(1 | user_input.size()))) +
67 url.reserve(reserve_capacity);
69 std::string tmp_buffer;
70 std::string_view internal_input;
71 if (unicode::has_tabs_or_newline(user_input)) {
72 tmp_buffer = user_input;
75 helpers::remove_ascii_tab_or_newline(tmp_buffer);
76 internal_input = tmp_buffer;
78 internal_input = user_input;
83 std::string_view url_data = internal_input;
84 helpers::trim_c0_whitespace(url_data);
87 std::optional<std::string_view> fragment = helpers::prune_hash(url_data);
96 size_t input_position = 0;
97 const size_t input_size = url_data.size();
102 while (input_position <= input_size) {
103 ada_log(
"In parsing at ", input_position,
" out of ", input_size,
107 ada_log(
"SCHEME_START ", helpers::substring(url_data, input_position));
110 if ((input_position != input_size) &&
122 ada_log(
"SCHEME ", helpers::substring(url_data, input_position));
125 while ((input_position != input_size) &&
126 (ada::unicode::is_alnum_plus(url_data[input_position]))) {
130 if ((input_position != input_size) &&
131 (url_data[input_position] ==
':')) {
132 ada_log(
"SCHEME the scheme should be ",
133 url_data.substr(0, input_position));
134 if constexpr (result_type_is_ada_url) {
135 if (!
url.parse_scheme(url_data.substr(0, input_position))) {
140 if (!
url.parse_scheme_with_colon(
141 url_data.substr(0, input_position + 1))) {
156 base_url->type ==
url.type) {
167 else if (input_position + 1 < input_size &&
168 url_data[input_position + 1] ==
'/') {
190 ada_log(
"NO_SCHEME ", helpers::substring(url_data, input_position));
193 if (base_url ==
nullptr ||
194 (base_url->has_opaque_path && !fragment.has_value())) {
195 ada_log(
"NO_SCHEME validation error");
202 else if (base_url->has_opaque_path && fragment.has_value() &&
203 input_position == input_size) {
204 ada_log(
"NO_SCHEME opaque base with fragment");
205 url.copy_scheme(*base_url);
208 if constexpr (result_type_is_ada_url) {
209 url.path = base_url->path;
210 url.query = base_url->query;
212 url.update_base_pathname(base_url->get_pathname());
213 url.update_base_search(base_url->get_search());
215 url.update_unencoded_base_hash(*fragment);
221 ada_log(
"NO_SCHEME non-file relative path");
226 ada_log(
"NO_SCHEME file base type");
232 ada_log(
"AUTHORITY ", helpers::substring(url_data, input_position));
241 const bool contains_ampersand =
242 (url_data.find(
'@', input_position) != std::string_view::npos);
244 if (!contains_ampersand) {
248 bool at_sign_seen{
false};
249 bool password_token_seen{
false};
256 std::string_view view = helpers::substring(url_data, input_position);
259 url.
is_special() ? helpers::find_authority_delimiter_special(view)
260 : helpers::find_authority_delimiter(view);
261 std::string_view authority_view(view.data(), location);
262 size_t end_of_authority = input_position + authority_view.size();
264 if ((end_of_authority != input_size) &&
265 (url_data[end_of_authority] ==
'@')) {
268 if (password_token_seen) {
269 if constexpr (result_type_is_ada_url) {
270 url.password +=
"%40";
272 url.append_base_password(
"%40");
275 if constexpr (result_type_is_ada_url) {
276 url.username +=
"%40";
278 url.append_base_username(
"%40");
285 if (!password_token_seen) {
286 size_t password_token_location = authority_view.find(
':');
287 password_token_seen =
288 password_token_location != std::string_view::npos;
290 if constexpr (store_values) {
291 if (!password_token_seen) {
292 if constexpr (result_type_is_ada_url) {
293 url.username += unicode::percent_encode(
297 url.append_base_username(unicode::percent_encode(
302 if constexpr (result_type_is_ada_url) {
303 url.username += unicode::percent_encode(
304 authority_view.substr(0, password_token_location),
306 url.password += unicode::percent_encode(
307 authority_view.substr(password_token_location + 1),
310 url.append_base_username(unicode::percent_encode(
311 authority_view.substr(0, password_token_location),
313 url.append_base_password(unicode::percent_encode(
314 authority_view.substr(password_token_location + 1),
319 }
else if constexpr (store_values) {
320 if constexpr (result_type_is_ada_url) {
321 url.password += unicode::percent_encode(
324 url.append_base_password(unicode::percent_encode(
332 else if (end_of_authority == input_size ||
333 url_data[end_of_authority] ==
'/' ||
334 url_data[end_of_authority] ==
'?' ||
338 if (at_sign_seen && authority_view.empty()) {
345 if (end_of_authority == input_size) {
346 if constexpr (store_values) {
347 if (fragment.has_value()) {
348 url.update_unencoded_base_hash(*fragment);
353 input_position = end_of_authority + 1;
359 ada_log(
"SPECIAL_RELATIVE_OR_AUTHORITY ",
360 helpers::substring(url_data, input_position));
365 std::string_view view = helpers::substring(url_data, input_position);
378 ada_log(
"PATH_OR_AUTHORITY ",
379 helpers::substring(url_data, input_position));
382 if ((input_position != input_size) &&
383 (url_data[input_position] ==
'/')) {
394 ada_log(
"RELATIVE_SCHEME ",
395 helpers::substring(url_data, input_position));
398 url.copy_scheme(*base_url);
401 if ((input_position != input_size) &&
402 (url_data[input_position] ==
'/')) {
404 "RELATIVE_SCHEME if c is U+002F (/), then set state to relative "
407 }
else if (
url.
is_special() && (input_position != input_size) &&
408 (url_data[input_position] ==
'\\')) {
412 "RELATIVE_SCHEME if url is special and c is U+005C, validation "
413 "error, set state to relative slash state");
416 ada_log(
"RELATIVE_SCHEME otherwise");
421 if constexpr (result_type_is_ada_url) {
422 url.username = base_url->username;
423 url.password = base_url->password;
424 url.host = base_url->host;
425 url.port = base_url->port;
428 url.path = base_url->path;
429 url.query = base_url->query;
431 url.update_base_authority(base_url->get_href(),
432 base_url->get_components());
436 url.update_base_port(base_url->retrieve_base_port());
439 url.update_base_pathname(base_url->get_pathname());
440 url.update_base_search(base_url->get_search());
447 if ((input_position != input_size) &&
448 (url_data[input_position] ==
'?')) {
452 else if (input_position != input_size) {
455 if constexpr (result_type_is_ada_url) {
457 helpers::shorten_path(
url.path,
url.type);
460 if (helpers::shorten_path(path,
url.type)) {
461 url.update_base_pathname(std::string(path));
473 ada_log(
"RELATIVE_SLASH ",
474 helpers::substring(url_data, input_position));
478 (url_data[input_position] ==
'/' ||
479 url_data[input_position] ==
'\\')) {
484 else if ((input_position != input_size) &&
485 (url_data[input_position] ==
'/')) {
495 if constexpr (result_type_is_ada_url) {
496 url.username = base_url->username;
497 url.password = base_url->password;
498 url.host = base_url->host;
499 url.port = base_url->port;
501 url.update_base_authority(base_url->get_href(),
502 base_url->get_components());
506 url.update_base_port(base_url->retrieve_base_port());
516 ada_log(
"SPECIAL_AUTHORITY_SLASHES ",
517 helpers::substring(url_data, input_position));
522 std::string_view view = helpers::substring(url_data, input_position);
530 ada_log(
"SPECIAL_AUTHORITY_IGNORE_SLASHES ",
531 helpers::substring(url_data, input_position));
535 while ((input_position != input_size) &&
536 ((url_data[input_position] ==
'/') ||
537 (url_data[input_position] ==
'\\'))) {
545 ada_log(
"QUERY ", helpers::substring(url_data, input_position));
546 if constexpr (store_values) {
549 const uint8_t* query_percent_encode_set =
556 url.update_base_search(helpers::substring(url_data, input_position),
557 query_percent_encode_set);
558 ada_log(
"QUERY update_base_search completed ");
559 if (fragment.has_value()) {
560 url.update_unencoded_base_hash(*fragment);
566 ada_log(
"HOST ", helpers::substring(url_data, input_position));
568 std::string_view host_view =
569 helpers::substring(url_data, input_position);
570 auto [location, found_colon] =
571 helpers::get_host_delimiter_location(
url.
is_special(), host_view);
572 input_position = (location != std::string_view::npos)
573 ? input_position + location
582 ada_log(
"HOST parsing ", host_view);
583 if (!
url.parse_host(host_view)) {
604 ada_log(
"HOST parsing ", host_view,
" href=",
url.
get_href());
607 if (host_view.empty()) {
608 url.update_base_hostname(
"");
609 }
else if (!
url.parse_host(host_view)) {
622 ada_log(
"OPAQUE_PATH ", helpers::substring(url_data, input_position));
623 std::string_view view = helpers::substring(url_data, input_position);
626 size_t location = view.find(
'?');
627 if (location != std::string_view::npos) {
628 view.remove_suffix(view.size() - location);
630 input_position += location + 1;
632 input_position = input_size + 1;
637 url.update_base_pathname(unicode::percent_encode(
642 ada_log(
"PORT ", helpers::substring(url_data, input_position));
643 std::string_view port_view =
644 helpers::substring(url_data, input_position);
645 size_t consumed_bytes =
url.parse_port(port_view,
true);
646 input_position += consumed_bytes;
654 ada_log(
"PATH_START ", helpers::substring(url_data, input_position));
663 if (input_position == input_size) {
664 if constexpr (store_values) {
665 url.update_base_pathname(
"/");
666 if (fragment.has_value()) {
667 url.update_unencoded_base_hash(*fragment);
675 if ((url_data[input_position] !=
'/') &&
676 (url_data[input_position] !=
'\\')) {
682 else if ((input_position != input_size) &&
683 (url_data[input_position] ==
'?')) {
687 else if (input_position != input_size) {
692 if (url_data[input_position] !=
'/') {
701 std::string_view view = helpers::substring(url_data, input_position);
702 ada_log(
"PATH ", helpers::substring(url_data, input_position));
706 size_t locofquestionmark = view.find(
'?');
707 if (locofquestionmark != std::string_view::npos) {
709 view.remove_suffix(view.size() - locofquestionmark);
710 input_position += locofquestionmark + 1;
712 input_position = input_size + 1;
714 if constexpr (store_values) {
715 if constexpr (result_type_is_ada_url) {
716 helpers::parse_prepared_path(view,
url.type,
url.path);
718 url.consume_prepared_path(view);
725 ada_log(
"FILE_SLASH ", helpers::substring(url_data, input_position));
728 if ((input_position != input_size) &&
729 (url_data[input_position] ==
'/' ||
730 url_data[input_position] ==
'\\')) {
731 ada_log(
"FILE_SLASH c is U+002F or U+005C");
736 ada_log(
"FILE_SLASH otherwise");
740 if (base_url !=
nullptr &&
743 if constexpr (result_type_is_ada_url) {
744 url.host = base_url->host;
753 if (!base_url->get_pathname().empty()) {
755 helpers::substring(url_data, input_position))) {
756 std::string_view first_base_url_path =
757 base_url->get_pathname().substr(1);
758 size_t loc = first_base_url_path.find(
'/');
759 if (loc != std::string_view::npos) {
760 helpers::resize(first_base_url_path, loc);
763 first_base_url_path)) {
764 if constexpr (result_type_is_ada_url) {
766 url.path += first_base_url_path;
768 url.append_base_pathname(
769 helpers::concat(
"/", first_base_url_path));
783 std::string_view view = helpers::substring(url_data, input_position);
784 ada_log(
"FILE_HOST ", helpers::substring(url_data, input_position));
786 size_t location = view.find_first_of(
"/\\?");
787 std::string_view file_host_buffer(
789 (location != std::string_view::npos) ? location : view.size());
793 }
else if (file_host_buffer.empty()) {
795 if constexpr (result_type_is_ada_url) {
798 url.update_base_hostname(
"");
803 size_t consumed_bytes = file_host_buffer.size();
804 input_position += consumed_bytes;
807 if (!
url.parse_host(file_host_buffer)) {
811 if constexpr (result_type_is_ada_url) {
813 if (
url.host.has_value() &&
url.host.value() ==
"localhost") {
818 url.update_base_hostname(
"");
829 ada_log(
"FILE ", helpers::substring(url_data, input_position));
830 std::string_view file_view =
831 helpers::substring(url_data, input_position);
833 url.set_protocol_as_file();
834 if constexpr (result_type_is_ada_url) {
838 url.update_base_hostname(
"");
841 if (input_position != input_size &&
842 (url_data[input_position] ==
'/' ||
843 url_data[input_position] ==
'\\')) {
844 ada_log(
"FILE c is U+002F or U+005C");
849 else if (base_url !=
nullptr &&
853 ada_log(
"FILE base non-null");
854 if constexpr (result_type_is_ada_url) {
855 url.host = base_url->host;
856 url.path = base_url->path;
857 url.query = base_url->query;
862 url.update_base_pathname(base_url->get_pathname());
863 url.update_base_search(base_url->get_search());
869 if (input_position != input_size && url_data[input_position] ==
'?') {
873 else if (input_position != input_size) {
879 if constexpr (result_type_is_ada_url) {
880 helpers::shorten_path(
url.path,
url.type);
883 if (helpers::shorten_path(path,
url.type)) {
884 url.update_base_pathname(std::string(path));
891 url.clear_pathname();
902 ada_log(
"FILE go to path");
914 if constexpr (store_values) {
915 if (fragment.has_value()) {
916 url.update_unencoded_base_hash(*fragment);