1#if ADA_INCLUDE_URL_PATTERN
10std::tuple<std::string, std::vector<std::string>>
12 const std::vector<url_pattern_part>& part_list,
13 url_pattern_compile_component_options options) {
18 std::vector<std::string> name_list{};
21 for (
const url_pattern_part& part : part_list) {
50 name_list.push_back(part.name);
53 std::string regexp_value = part.value;
69 if (part.prefix.empty() && part.suffix.empty()) {
74 result +=
"(" + regexp_value +
")" +
78 result +=
"((?:" + regexp_value +
")" +
112 result.append(regexp_value);
124 result.append(regexp_value);
143 return {std::move(
result), std::move(name_list)};
148 if (input.size() < 2)
return false;
152 if (input.front() ==
'[')
return true;
155 if (input.starts_with(
"{["))
return true;
158 return input.starts_with(
"\\[");
180 url_pattern_compile_component_options options) {
182 std::string
result =
"[^";
189 ada_log(
"generate_segment_wildcard_regexp result: ",
result);
194 std::string_view input) {
195 ada_log(
"canonicalize_protocol called with input=", input);
197 if (input.empty()) [[unlikely]] {
202 if (input.ends_with(
":")) {
203 input.remove_suffix(1);
210 std::string(input) +
"://dummy.test",
nullptr)) {
213 auto protocol = dummy_url->get_protocol();
214 protocol.remove_suffix(1);
215 return std::string(protocol);
222 std::string_view input) {
224 if (input.empty()) [[unlikely]] {
231 if (!url->set_username(input)) {
235 return std::string(url->get_username());
239 std::string_view input) {
241 if (input.empty()) [[unlikely]] {
249 if (!url->set_password(input)) {
253 return std::string(url->get_password());
257 std::string_view input) {
258 ada_log(
"canonicalize_hostname input=", input);
260 if (input.empty()) [[unlikely]] {
272 if (!url->set_hostname(input)) {
277 return std::string(url->get_hostname());
281 std::string_view input) {
282 ada_log(
"canonicalize_ipv6_hostname input=", input);
284 if (std::ranges::any_of(input, [](
char c) {
285 return c !=
'[' && c !=
']' && c !=
':' &&
286 !unicode::is_ascii_hex_digit(c);
292 auto hostname = std::string(input);
293 unicode::to_lower_ascii(hostname.data(), hostname.size());
298 std::string_view port_value) {
300 if (port_value.empty()) [[unlikely]] {
309 if (url->set_port(port_value)) {
311 return std::string(url->get_port());
318 std::string_view port_value, std::string_view protocol) {
320 if (port_value.empty()) [[unlikely]] {
327 if (protocol.empty()) {
329 }
else if (protocol.ends_with(
":")) {
330 protocol.remove_suffix(1);
341 if (url && url->set_port(port_value) && url->has_port()) {
343 return std::string(url->get_port());
347 if (scheme::is_special(protocol) && url->get_port().empty()) {
356 std::string_view input) {
358 if (input.empty()) [[unlikely]] {
363 const bool leading_slash = input.starts_with(
"/");
366 const auto modified_value = leading_slash ?
"" :
"/-";
367 const auto full_url =
368 std::string(
"fake://fake-url") + modified_value + std::string(input);
370 const auto pathname = url->get_pathname();
373 return leading_slash ? std::string(pathname)
374 : std::string(pathname.substr(2));
381 std::string_view input) {
383 if (input.empty()) [[unlikely]] {
393 return std::string(url->get_pathname());
401 if (input.empty()) [[unlikely]] {
410 url->set_search(input);
411 if (url->has_search()) {
412 const auto search = url->get_search();
413 return std::string(search.substr(1));
420 if (input.empty()) [[unlikely]] {
429 url->set_hash(input);
431 if (url->has_hash()) {
432 const auto hash = url->get_hash();
433 return std::string(hash.substr(1));
438tl::expected<std::vector<token>,
errors>
tokenize(std::string_view input,
440 ada_log(
"tokenize input: ", input);
444 auto tokenizer =
Tokenizer(input, policy);
446 while (tokenizer.index < tokenizer.input.size()) {
449 tokenizer.seek_and_get_next_code_point(tokenizer.index);
452 if (tokenizer.code_point ==
'*') {
456 ada_log(
"add ASTERISK token");
462 if (tokenizer.code_point ==
'+' || tokenizer.code_point ==
'?') {
471 if (tokenizer.code_point ==
'\\') {
474 if (tokenizer.index == tokenizer.input.size() - 1) {
477 if (
auto error = tokenizer.process_tokenizing_error(
478 tokenizer.next_index, tokenizer.index)) {
479 ada_log(
"process_tokenizing_error failed");
480 return tl::unexpected(*error);
486 auto escaped_index = tokenizer.next_index;
488 tokenizer.get_next_code_point();
491 tokenizer.add_token_with_default_length(
493 ada_log(
"add ESCAPED_CHAR token on next_index ", tokenizer.next_index,
494 " with escaped index ", escaped_index);
500 if (tokenizer.code_point ==
'{') {
504 ada_log(
"add OPEN token");
509 if (tokenizer.code_point ==
'}') {
513 ada_log(
"add CLOSE token");
518 if (tokenizer.code_point ==
':') {
520 auto name_position = tokenizer.next_index;
522 auto name_start = name_position;
524 while (name_position < tokenizer.input.size()) {
527 tokenizer.seek_and_get_next_code_point(name_position);
530 bool first_code_point = name_position == name_start;
533 auto valid_code_point =
535 ada_log(
"tokenizer.code_point=", uint32_t(tokenizer.code_point),
536 " first_code_point=", first_code_point,
537 " valid_code_point=", valid_code_point);
539 if (!valid_code_point)
break;
541 name_position = tokenizer.next_index;
545 if (name_position <= name_start) {
548 if (
auto error = tokenizer.process_tokenizing_error(name_start,
550 ada_log(
"process_tokenizing_error failed");
551 return tl::unexpected(*error);
565 if (tokenizer.code_point ==
'(') {
569 auto regexp_position = tokenizer.next_index;
571 auto regexp_start = regexp_position;
577 while (regexp_position < tokenizer.input.size()) {
580 tokenizer.seek_and_get_next_code_point(regexp_position);
585 if (!unicode::is_ascii(tokenizer.code_point)) {
588 if (
auto process_error = tokenizer.process_tokenizing_error(
589 regexp_start, tokenizer.index)) {
590 return tl::unexpected(*process_error);
599 if (regexp_position == regexp_start && tokenizer.code_point ==
'?') {
602 if (
auto process_error = tokenizer.process_tokenizing_error(
603 regexp_start, tokenizer.index)) {
604 return tl::unexpected(*process_error);
612 if (tokenizer.code_point ==
'\\') {
614 if (regexp_position == tokenizer.input.size() - 1) {
617 if (
auto process_error = tokenizer.process_tokenizing_error(
618 regexp_start, tokenizer.index)) {
619 return tl::unexpected(*process_error);
626 tokenizer.get_next_code_point();
629 if (!unicode::is_ascii(tokenizer.code_point)) {
632 if (
auto process_error = tokenizer.process_tokenizing_error(
633 regexp_start, tokenizer.index);
634 process_error.has_value()) {
635 return tl::unexpected(*process_error);
642 regexp_position = tokenizer.next_index;
647 if (tokenizer.code_point ==
')') {
653 regexp_position = tokenizer.next_index;
657 }
else if (tokenizer.code_point ==
'(') {
663 if (regexp_position == tokenizer.input.size() - 1) {
666 if (
auto process_error = tokenizer.process_tokenizing_error(
667 regexp_start, tokenizer.index)) {
668 return tl::unexpected(*process_error);
675 auto temporary_position = tokenizer.next_index;
677 tokenizer.get_next_code_point();
679 if (tokenizer.code_point !=
'?') {
682 if (
auto process_error = tokenizer.process_tokenizing_error(
683 regexp_start, tokenizer.index)) {
684 return tl::unexpected(*process_error);
691 tokenizer.next_index = temporary_position;
694 regexp_position = tokenizer.next_index;
703 if (
auto process_error = tokenizer.process_tokenizing_error(
704 regexp_start, tokenizer.index)) {
705 return tl::unexpected(*process_error);
710 auto regexp_length = regexp_position - regexp_start - 1;
712 if (regexp_length == 0) {
715 if (
auto process_error = tokenizer.process_tokenizing_error(
716 regexp_start, tokenizer.index)) {
717 ada_log(
"process_tokenizing_error failed");
718 return tl::unexpected(*process_error);
734 tokenizer.add_token_with_default_length(
token_type::END, tokenizer.index,
737 ada_log(
"tokenizer.token_list size is: ", tokenizer.token_list.size());
739 return tokenizer.token_list;
743 ada_log(
"escape_pattern_string called with input=", input);
744 if (input.empty()) [[unlikely]] {
751 result.reserve(input.size());
754 constexpr auto should_escape = [](
const char c) {
755 return c ==
'+' || c ==
'*' || c ==
'?' || c ==
':' || c ==
'{' ||
756 c ==
'}' || c ==
'(' || c ==
')' || c ==
'\\';
760 for (
const auto& c : input) {
761 if (should_escape(c)) {
774constexpr std::array<uint8_t, 256> escape_regexp_table = []()
consteval {
775 std::array<uint8_t, 256> out{};
776 for (
auto& c : {
'.',
'+',
'*',
'?',
'^',
'$',
'{',
'}',
'(',
')',
'[',
']',
783constexpr bool should_escape_regexp_char(
char c) {
784 return escape_regexp_table[(uint8_t)c];
793 result.reserve(input.size());
794 for (
const auto& c : input) {
796 if (should_escape_regexp_char(c)) {
797 result.append(std::string(
"\\") + c);
809 return std::string(input);
818 if (input.empty()) [[unlikely]] {
822 if (input.starts_with(
"/"))
return true;
826 if (input.size() < 2)
return false;
830 return input[1] ==
'/' && (input[0] ==
'\\' || input[0] ==
'{');
834 std::vector<url_pattern_part>& part_list,
835 url_pattern_compile_component_options& options) {
840 for (
size_t index = 0; index < part_list.size(); index++) {
842 auto part = part_list[index];
846 std::optional<url_pattern_part> previous_part =
847 index == 0 ? std::nullopt : std::optional(part_list[index - 1]);
850 std::optional<url_pattern_part> next_part =
851 index < part_list.size() - 1 ? std::optional(part_list[index + 1])
876 bool custom_name = !unicode::is_ascii_digit(part.name[0]);
882 bool needs_grouping =
883 !part.suffix.empty() ||
884 (!part.prefix.empty() && part.prefix[0] != options.get_prefix()[0]);
894 if (!needs_grouping && custom_name &&
897 next_part.has_value() && next_part->prefix.empty() &&
898 next_part->suffix.empty()) {
905 needs_grouping =
true;
909 needs_grouping = !next_part->name.empty() &&
910 unicode::is_ascii_digit(next_part->name[0]);
921 if (!needs_grouping && part.prefix.empty() && previous_part.has_value() &&
923 !options.get_prefix().empty() &&
924 previous_part->value.at(previous_part->value.size() - 1) ==
925 options.get_prefix()[0]) {
926 needs_grouping =
true;
933 if (needs_grouping) {
954 result.append(part.value);
977 (!previous_part.has_value() ||
980 needs_grouping || !part.prefix.empty())) {
998 !part.suffix.empty() &&
1007 if (needs_grouping)
result.append(
"}");
#define ADA_ASSERT_TRUE(COND)
bool constexpr is_ascii(std::u32string_view view)
bool valid_name_code_point(char32_t code_point, bool first)
tl::expected< std::string, errors > canonicalize_opaque_pathname(std::string_view input)
tl::expected< std::string, errors > canonicalize_pathname(std::string_view input)
std::string escape_pattern_string(std::string_view input)
std::string convert_modifier_to_string(url_pattern_part_modifier modifier)
tl::expected< std::string, errors > canonicalize_password(std::string_view input)
tl::expected< std::vector< token >, errors > tokenize(std::string_view input, token_policy policy)
std::string generate_segment_wildcard_regexp(url_pattern_compile_component_options options)
tl::expected< std::string, errors > canonicalize_protocol(std::string_view input)
tl::expected< std::string, errors > canonicalize_hostname(std::string_view input)
std::string generate_pattern_string(std::vector< url_pattern_part > &part_list, url_pattern_compile_component_options &options)
tl::expected< std::string, errors > canonicalize_port_with_protocol(std::string_view input, std::string_view protocol)
std::string escape_regexp_string(std::string_view input)
tl::expected< std::string, errors > canonicalize_hash(std::string_view input)
tl::expected< std::string, errors > canonicalize_port(std::string_view input)
bool is_ipv6_address(std::string_view input) noexcept
tl::expected< std::string, errors > canonicalize_search(std::string_view input)
constexpr bool is_absolute_pathname(std::string_view input, url_pattern_init::process_type type) noexcept
tl::expected< std::string, errors > canonicalize_ipv6_hostname(std::string_view input)
tl::expected< std::string, errors > canonicalize_username(std::string_view input)
std::tuple< std::string, std::vector< std::string > > generate_regular_expression_and_name_list(const std::vector< url_pattern_part > &part_list, url_pattern_compile_component_options options)
std::string process_base_url_string(std::string_view input, url_pattern_init::process_type type)
url_pattern_part_modifier
template ada::result< url_aggregator > parse< url_aggregator >(std::string_view input, const url_aggregator *base_url)
tl::expected< result_type, ada::errors > result
Declaration for the URLPattern helpers.