440 ada_log(
"tokenize input: ", input);
444 auto tokenizer =
Tokenizer(input, policy);
446 while (tokenizer.index < tokenizer.input.size()) {
449 tokenizer.seek_and_get_next_code_point(tokenizer.index);
452 if (tokenizer.code_point ==
'*') {
456 ada_log(
"add ASTERISK token");
462 if (tokenizer.code_point ==
'+' || tokenizer.code_point ==
'?') {
471 if (tokenizer.code_point ==
'\\') {
474 if (tokenizer.index == tokenizer.input.size() - 1) {
477 if (
auto error = tokenizer.process_tokenizing_error(
478 tokenizer.next_index, tokenizer.index)) {
479 ada_log(
"process_tokenizing_error failed");
480 return tl::unexpected(*error);
486 auto escaped_index = tokenizer.next_index;
488 tokenizer.get_next_code_point();
491 tokenizer.add_token_with_default_length(
493 ada_log(
"add ESCAPED_CHAR token on next_index ", tokenizer.next_index,
494 " with escaped index ", escaped_index);
500 if (tokenizer.code_point ==
'{') {
504 ada_log(
"add OPEN token");
509 if (tokenizer.code_point ==
'}') {
513 ada_log(
"add CLOSE token");
518 if (tokenizer.code_point ==
':') {
520 auto name_position = tokenizer.next_index;
522 auto name_start = name_position;
524 while (name_position < tokenizer.input.size()) {
527 tokenizer.seek_and_get_next_code_point(name_position);
530 bool first_code_point = name_position == name_start;
533 auto valid_code_point =
535 ada_log(
"tokenizer.code_point=", uint32_t(tokenizer.code_point),
536 " first_code_point=", first_code_point,
537 " valid_code_point=", valid_code_point);
539 if (!valid_code_point)
break;
541 name_position = tokenizer.next_index;
545 if (name_position <= name_start) {
548 if (
auto error = tokenizer.process_tokenizing_error(name_start,
550 ada_log(
"process_tokenizing_error failed");
551 return tl::unexpected(*error);
565 if (tokenizer.code_point ==
'(') {
569 auto regexp_position = tokenizer.next_index;
571 auto regexp_start = regexp_position;
577 while (regexp_position < tokenizer.input.size()) {
580 tokenizer.seek_and_get_next_code_point(regexp_position);
585 if (!unicode::is_ascii(tokenizer.code_point)) {
588 if (
auto process_error = tokenizer.process_tokenizing_error(
589 regexp_start, tokenizer.index)) {
590 return tl::unexpected(*process_error);
599 if (regexp_position == regexp_start && tokenizer.code_point ==
'?') {
602 if (
auto process_error = tokenizer.process_tokenizing_error(
603 regexp_start, tokenizer.index)) {
604 return tl::unexpected(*process_error);
612 if (tokenizer.code_point ==
'\\') {
614 if (regexp_position == tokenizer.input.size() - 1) {
617 if (
auto process_error = tokenizer.process_tokenizing_error(
618 regexp_start, tokenizer.index)) {
619 return tl::unexpected(*process_error);
626 tokenizer.get_next_code_point();
629 if (!unicode::is_ascii(tokenizer.code_point)) {
632 if (
auto process_error = tokenizer.process_tokenizing_error(
633 regexp_start, tokenizer.index);
634 process_error.has_value()) {
635 return tl::unexpected(*process_error);
642 regexp_position = tokenizer.next_index;
647 if (tokenizer.code_point ==
')') {
653 regexp_position = tokenizer.next_index;
657 }
else if (tokenizer.code_point ==
'(') {
663 if (regexp_position == tokenizer.input.size() - 1) {
666 if (
auto process_error = tokenizer.process_tokenizing_error(
667 regexp_start, tokenizer.index)) {
668 return tl::unexpected(*process_error);
675 auto temporary_position = tokenizer.next_index;
677 tokenizer.get_next_code_point();
679 if (tokenizer.code_point !=
'?') {
682 if (
auto process_error = tokenizer.process_tokenizing_error(
683 regexp_start, tokenizer.index)) {
684 return tl::unexpected(*process_error);
691 tokenizer.next_index = temporary_position;
694 regexp_position = tokenizer.next_index;
703 if (
auto process_error = tokenizer.process_tokenizing_error(
704 regexp_start, tokenizer.index)) {
705 return tl::unexpected(*process_error);
710 auto regexp_length = regexp_position - regexp_start - 1;
712 if (regexp_length == 0) {
715 if (
auto process_error = tokenizer.process_tokenizing_error(
716 regexp_start, tokenizer.index)) {
717 ada_log(
"process_tokenizing_error failed");
718 return tl::unexpected(*process_error);
734 tokenizer.add_token_with_default_length(
token_type::END, tokenizer.index,
737 ada_log(
"tokenizer.token_list size is: ", tokenizer.token_list.size());
739 return tokenizer.token_list;
836 std::vector<url_pattern_part>& part_list,
842 for (
size_t index = 0; index < part_list.size(); index++) {
844 auto part = part_list[index];
848 std::optional<url_pattern_part> previous_part =
849 index == 0 ? std::nullopt : std::optional(part_list[index - 1]);
852 std::optional<url_pattern_part> next_part =
853 index < part_list.size() - 1 ? std::optional(part_list[index + 1])
878 bool custom_name = !unicode::is_ascii_digit(part.name[0]);
884 bool needs_grouping =
885 !part.suffix.empty() ||
886 (!part.prefix.empty() && part.prefix[0] != options.
get_prefix()[0]);
896 if (!needs_grouping && custom_name &&
899 next_part.has_value() && next_part->prefix.empty() &&
900 next_part->suffix.empty()) {
907 needs_grouping =
true;
911 needs_grouping = !next_part->name.empty() &&
912 unicode::is_ascii_digit(next_part->name[0]);
923 if (!needs_grouping && part.prefix.empty() && previous_part.has_value() &&
926 previous_part->value.at(previous_part->value.size() - 1) ==
928 needs_grouping =
true;
935 if (needs_grouping) {
956 result.append(part.value);
979 (!previous_part.has_value() ||
982 needs_grouping || !part.prefix.empty())) {
1000 !part.suffix.empty() &&
1009 if (needs_grouping)
result.append(
"}");