21 return "INVALID_CHAR";
33 return "ESCAPED_CHAR";
35 return "OTHER_MODIFIER";
46template <url_pattern_regex::regex_concept regex_prov
ider>
47constexpr void constructor_string_parser<regex_provider>::rewind() {
49 token_index = component_start;
54template <url_pattern_regex::regex_concept regex_prov
ider>
55constexpr bool constructor_string_parser<regex_provider>::is_hash_prefix() {
58 return is_non_special_pattern_char(token_index,
'#');
61template <url_pattern_regex::regex_concept regex_prov
ider>
62constexpr bool constructor_string_parser<regex_provider>::is_search_prefix() {
65 if (is_non_special_pattern_char(token_index,
'?')) {
71 if (token_list[token_index].value !=
"?") {
76 if (token_index == 0)
return true;
78 auto previous_index = token_index - 1;
81 auto previous_token = get_safe_token(previous_index);
94template <url_pattern_regex::regex_concept regex_prov
ider>
96constructor_string_parser<regex_provider>::is_non_special_pattern_char(
97 size_t index, uint32_t value)
const {
99 auto token = get_safe_token(index);
106 static_cast<uint32_t
>(
token->
value[0]) != value) {
120template <url_pattern_regex::regex_concept regex_prov
ider>
121constexpr const token*
122constructor_string_parser<regex_provider>::get_safe_token(
size_t index)
const {
125 if (index < token_list.size()) [[likely]] {
126 return &token_list[index];
137 return &token_list.back();
140template <url_pattern_regex::regex_concept regex_prov
ider>
141constexpr bool constructor_string_parser<regex_provider>::is_group_open()
148template <url_pattern_regex::regex_concept regex_prov
ider>
149constexpr bool constructor_string_parser<regex_provider>::is_group_close()
156template <url_pattern_regex::regex_concept regex_prov
ider>
158constructor_string_parser<regex_provider>::next_is_authority_slashes()
const {
161 if (!is_non_special_pattern_char(token_index + 1,
'/')) {
166 if (!is_non_special_pattern_char(token_index + 2,
'/')) {
172template <url_pattern_regex::regex_concept regex_prov
ider>
173constexpr bool constructor_string_parser<regex_provider>::is_protocol_suffix()
177 return is_non_special_pattern_char(token_index,
':');
180template <url_pattern_regex::regex_concept regex_prov
ider>
181void constructor_string_parser<regex_provider>::change_state(State new_state,
186 if (
state != State::INIT &&
state != State::AUTHORITY &&
187 state != State::DONE) {
188 auto value = make_component_string();
191 case State::PROTOCOL: {
195 case State::USERNAME: {
199 case State::PASSWORD: {
203 case State::HOSTNAME: {
211 case State::PATHNAME: {
215 case State::SEARCH: {
229 if (
state != State::INIT && new_state != State::DONE) {
234 if ((
state == State::PROTOCOL ||
state == State::AUTHORITY ||
235 state == State::USERNAME ||
state == State::PASSWORD) &&
236 (new_state == State::PORT || new_state == State::PATHNAME ||
237 new_state == State::SEARCH || new_state == State::HASH) &&
245 if ((
state == State::PROTOCOL ||
state == State::AUTHORITY ||
246 state == State::USERNAME ||
state == State::PASSWORD ||
247 state == State::HOSTNAME ||
state == State::PORT) &&
248 (new_state == State::SEARCH || new_state == State::HASH) &&
250 if (protocol_matches_a_special_scheme_flag) {
262 if ((
state == State::PROTOCOL ||
state == State::AUTHORITY ||
263 state == State::USERNAME ||
state == State::PASSWORD ||
264 state == State::HOSTNAME ||
state == State::PORT ||
265 state == State::PATHNAME) &&
266 new_state == State::HASH && !
result.search) {
275 component_start = token_index;
280template <url_pattern_regex::regex_concept regex_prov
ider>
281std::string constructor_string_parser<regex_provider>::make_component_string() {
287 const auto end_index = token_list[token_index].index;
290 const auto component_start_token = get_safe_token(component_start);
293 const auto component_start_input_index = component_start_token->index;
296 return input.substr(component_start_input_index,
297 end_index - component_start_input_index);
300template <url_pattern_regex::regex_concept regex_prov
ider>
302constructor_string_parser<regex_provider>::is_an_identity_terminator()
const {
305 return is_non_special_pattern_char(token_index,
'@');
308template <url_pattern_regex::regex_concept regex_prov
ider>
309constexpr bool constructor_string_parser<regex_provider>::is_pathname_start()
313 return is_non_special_pattern_char(token_index,
'/');
316template <url_pattern_regex::regex_concept regex_prov
ider>
317constexpr bool constructor_string_parser<regex_provider>::is_password_prefix()
321 return is_non_special_pattern_char(token_index,
':');
324template <url_pattern_regex::regex_concept regex_prov
ider>
325constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_open()
329 return is_non_special_pattern_char(token_index,
'[');
332template <url_pattern_regex::regex_concept regex_prov
ider>
333constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_close()
337 return is_non_special_pattern_char(token_index,
']');
340template <url_pattern_regex::regex_concept regex_prov
ider>
341constexpr bool constructor_string_parser<regex_provider>::is_port_prefix()
345 return is_non_special_pattern_char(token_index,
':');
349 ada_log(
"Tokenizer::get_next_code_point called with index=", next_index);
353 size_t number_bytes = 0;
354 unsigned char first_byte = input[next_index];
356 if ((first_byte & 0x80) == 0) {
359 code_point = first_byte;
360 ada_log(
"Tokenizer::get_next_code_point returning ASCII code point=",
361 uint32_t(code_point));
362 ada_log(
"Tokenizer::get_next_code_point next_index =", next_index,
363 " input.size()=", input.size());
366 ada_log(
"Tokenizer::get_next_code_point read first byte=",
367 uint32_t(first_byte));
368 if ((first_byte & 0xE0) == 0xC0) {
369 code_point = first_byte & 0x1F;
371 ada_log(
"Tokenizer::get_next_code_point two bytes");
372 }
else if ((first_byte & 0xF0) == 0xE0) {
373 code_point = first_byte & 0x0F;
375 ada_log(
"Tokenizer::get_next_code_point three bytes");
376 }
else if ((first_byte & 0xF8) == 0xF0) {
377 code_point = first_byte & 0x07;
379 ada_log(
"Tokenizer::get_next_code_point four bytes");
383 for (
size_t i = 1 + next_index; i < number_bytes + next_index; ++i) {
384 unsigned char byte = input[i];
385 ada_log(
"Tokenizer::get_next_code_point read byte=", uint32_t(
byte));
386 code_point = (code_point << 6) | (
byte & 0x3F);
388 ada_log(
"Tokenizer::get_next_code_point returning non-ASCII code point=",
389 uint32_t(code_point));
390 ada_log(
"Tokenizer::get_next_code_point next_index =", next_index,
391 " input.size()=", input.size());
392 next_index += number_bytes;
396 ada_log(
"Tokenizer::seek_and_get_next_code_point called with new_index=",
399 next_index = new_index;
405 size_t value_position,
size_t value_length) {
406 ada_log(
"Tokenizer::add_token called with type=",
to_string(type),
407 " next_position=", next_position,
" value_position=", value_position);
416 token_list.emplace_back(type, index,
417 input.substr(value_position, value_length));
419 index = next_position;
423 size_t next_position,
424 size_t value_position) {
426 auto computed_length = next_position - value_position;
429 add_token(type, next_position, value_position, computed_length);
433 ada_log(
"Tokenizer::add_token_with_defaults called with type=",
442 size_t value_position) {
445 ada_log(
"process_tokenizing_error failed with next_position=",
446 next_position,
" value_position=", value_position);
458template <url_pattern_encoding_callback F>
471template <url_pattern_encoding_callback F>
473 const token* name_token) {
479 if (!name_token && !
token) {
486template <url_pattern_encoding_callback F>
488 ada_log(
"url_pattern_parser::try_consume_token called with type=",
495 if (next_token.type != type)
return nullptr;
502template <url_pattern_encoding_callback F>
523template <url_pattern_encoding_callback F>
525 ada_log(
"url_pattern_parser::consume_required_token called with type=",
532template <url_pattern_encoding_callback F>
537 ada_log(
"pending_fixed_value is empty");
543 if (!encoded_value) {
545 return encoded_value.error();
553 std::move(*encoded_value),
558template <url_pattern_encoding_callback F>
560 std::string_view prefix,
token* name_token,
token* regexp_or_wildcard_token,
561 std::string_view suffix,
token* modifier_token) {
565 if (modifier_token) {
567 if (modifier_token->
value ==
"?") {
569 }
else if (modifier_token->
value ==
"*") {
573 }
else if (modifier_token->
value ==
"+") {
581 if (!name_token && !regexp_or_wildcard_token &&
592 if (!name_token && !regexp_or_wildcard_token) {
596 if (prefix.empty())
return std::nullopt;
600 if (!encoded_value) {
601 return encoded_value.error();
607 std::move(*encoded_value), modifier);
611 std::string regexp_value{};
614 if (!regexp_or_wildcard_token) {
622 regexp_value = regexp_or_wildcard_token->
value;
631 regexp_value.clear();
632 }
else if (regexp_value ==
".*") {
637 regexp_value.clear();
643 name = name_token->
value;
644 }
else if (regexp_or_wildcard_token !=
nullptr) {
653 if (std::ranges::any_of(
654 parts, [&name](
const auto& part) {
return part.name == name; })) {
660 if (!encoded_prefix)
return encoded_prefix.error();
664 if (!encoded_suffix)
return encoded_suffix.error();
669 parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
670 std::move(*encoded_prefix), std::move(*encoded_suffix));
674template <url_pattern_encoding_callback F>
677 F& encoding_callback) {
678 ada_log(
"parse_pattern_string input=", input);
687 if (!tokenize_result) {
688 ada_log(
"parse_pattern_string tokenize failed");
689 return tl::unexpected(tokenize_result.error());
691 parser.tokens = std::move(*tokenize_result);
694 while (
parser.can_continue()) {
703 auto regexp_or_wildcard_token =
704 parser.try_consume_regexp_or_wildcard_token(name_token);
706 if (name_token || regexp_or_wildcard_token) {
708 std::string prefix{};
710 if (char_token) prefix = char_token->value;
712 if (!prefix.empty() && prefix != options.
get_prefix()) {
714 parser.pending_fixed_value.append(prefix);
719 if (
auto error =
parser.maybe_add_part_from_the_pending_fixed_value()) {
720 ada_log(
"maybe_add_part_from_the_pending_fixed_value failed");
721 return tl::unexpected(*error);
725 auto modifier_token =
parser.try_consume_modifier_token();
729 parser.add_part(prefix, name_token, regexp_or_wildcard_token,
"",
731 ada_log(
"parser.add_part failed");
732 return tl::unexpected(*error);
739 auto fixed_token = char_token;
747 parser.pending_fixed_value.append(fixed_token->value);
757 auto prefix_ =
parser.consume_text();
763 regexp_or_wildcard_token =
764 parser.try_consume_regexp_or_wildcard_token(name_token);
766 auto suffix_ =
parser.consume_text();
769 ada_log(
"parser.consume_required_token failed");
774 auto modifier_token =
parser.try_consume_modifier_token();
778 parser.add_part(prefix_, name_token, regexp_or_wildcard_token,
779 suffix_, modifier_token)) {
780 return tl::unexpected(*error);
786 if (
auto error =
parser.maybe_add_part_from_the_pending_fixed_value()) {
787 ada_log(
"maybe_add_part_from_the_pending_fixed_value failed on line 992");
788 return tl::unexpected(*error);
795 ada_log(
"parser.parts size is: ",
parser.parts.size());
800template <url_pattern_regex::regex_concept regex_prov
ider>
804 auto& regex = component.
regexp;
805 return regex_provider::regex_match(
"http", regex) ||
806 regex_provider::regex_match(
"https", regex) ||
807 regex_provider::regex_match(
"ws", regex) ||
808 regex_provider::regex_match(
"wss", regex) ||
809 regex_provider::regex_match(
"ftp", regex);
812template <url_pattern_regex::regex_concept regex_prov
ider>
813inline std::optional<errors> constructor_string_parser<
816 "constructor_string_parser::compute_protocol_matches_special_scheme_"
820 auto protocol_string = make_component_string();
826 if (!protocol_component) {
827 ada_log(
"url_pattern_component::compile failed for protocol_string ",
829 return protocol_component.error();
835 protocol_matches_a_special_scheme_flag =
true;
840template <url_pattern_regex::regex_concept regex_prov
ider>
841tl::expected<url_pattern_init, errors>
843 ada_log(
"constructor_string_parser::parse input=", input);
848 return tl::unexpected(token_list.error());
855 parser.token_increment = 1;
865 if (
parser.is_hash_prefix()) {
867 }
else if (
parser.is_search_prefix()) {
899 if (
parser.is_group_open()) {
907 if (
parser.group_depth > 0) {
910 if (
parser.is_group_close()) {
923 if (
parser.is_protocol_suffix()) {
932 if (
parser.is_protocol_suffix()) {
934 if (
const auto error =
935 parser.compute_protocol_matches_special_scheme_flag()) {
936 ada_log(
"compute_protocol_matches_special_scheme_flag failed");
937 return tl::unexpected(*error);
945 if (
parser.next_is_authority_slashes()) {
950 }
else if (
parser.protocol_matches_a_special_scheme_flag) {
957 parser.change_state(next_state, skip);
964 if (
parser.is_an_identity_terminator()) {
967 }
else if (
parser.is_pathname_start() ||
parser.is_search_prefix() ||
968 parser.is_hash_prefix()) {
982 if (
parser.is_password_prefix()) {
984 }
else if (
parser.is_an_identity_terminator()) {
995 if (
parser.is_an_identity_terminator()) {
1003 if (
parser.is_an_ipv6_open()) {
1004 parser.hostname_ipv6_bracket_depth += 1;
1005 }
else if (
parser.is_an_ipv6_close()) {
1008 parser.hostname_ipv6_bracket_depth -= 1;
1009 }
else if (
parser.is_port_prefix() &&
1010 parser.hostname_ipv6_bracket_depth == 0) {
1015 }
else if (
parser.is_pathname_start()) {
1019 }
else if (
parser.is_search_prefix()) {
1023 }
else if (
parser.is_hash_prefix()) {
1034 if (
parser.is_pathname_start()) {
1036 }
else if (
parser.is_search_prefix()) {
1040 }
else if (
parser.is_hash_prefix()) {
1050 if (
parser.is_search_prefix()) {
1052 }
else if (
parser.is_hash_prefix()) {
1062 if (
parser.is_hash_prefix()) {