5#ifndef ADA_URL_PATTERN_HELPERS_INL_H
6#define ADA_URL_PATTERN_HELPERS_INL_H
12#include "ada/expected.h"
20 return "INVALID_CHAR";
32 return "ESCAPED_CHAR";
34 return "OTHER_MODIFIER";
44template <url_pattern_regex::regex_concept regex_prov
ider>
47 token_index = component_start;
52template <url_pattern_regex::regex_concept regex_prov
ider>
56 return is_non_special_pattern_char(token_index,
"#");
59template <url_pattern_regex::regex_concept regex_prov
ider>
63 if (is_non_special_pattern_char(token_index,
"?")) {
69 if (token_list[token_index].value !=
"?") {
74 if (token_index == 0)
return true;
76 auto previous_index = token_index - 1;
79 auto previous_token = get_safe_token(previous_index);
92template <url_pattern_regex::regex_concept regex_prov
ider>
93bool constructor_string_parser<regex_provider>::is_non_special_pattern_char(
94 size_t index, std::string_view value) {
96 auto token = get_safe_token(index);
114template <url_pattern_regex::regex_concept regex_prov
ider>
115const token* constructor_string_parser<regex_provider>::get_safe_token(
119 if (index < token_list.size()) [[likely]] {
120 return &token_list[index];
131 return &token_list.back();
134template <url_pattern_regex::regex_concept regex_prov
ider>
141template <url_pattern_regex::regex_concept regex_prov
ider>
148template <url_pattern_regex::regex_concept regex_prov
ider>
152 if (!is_non_special_pattern_char(token_index + 1,
"/")) {
157 if (!is_non_special_pattern_char(token_index + 2,
"/")) {
163template <url_pattern_regex::regex_concept regex_prov
ider>
167 return is_non_special_pattern_char(token_index,
":");
170template <url_pattern_regex::regex_concept regex_prov
ider>
178 auto value = make_component_string();
182 result.protocol = value;
186 result.username = value;
190 result.password = value;
194 result.hostname = value;
202 result.pathname = value;
206 result.search = value;
229 result.hostname =
"";
240 if (protocol_matches_a_special_scheme_flag) {
241 result.pathname =
"/";
244 result.pathname =
"";
265 component_start = token_index;
270template <url_pattern_regex::regex_concept regex_prov
ider>
271std::string constructor_string_parser<regex_provider>::make_component_string() {
277 const auto end_index = token_list[token_index].index;
280 const auto component_start_token = get_safe_token(component_start);
283 const auto component_start_input_index = component_start_token->index;
286 return input.substr(component_start_input_index,
287 end_index - component_start_input_index);
290template <url_pattern_regex::regex_concept regex_prov
ider>
294 return is_non_special_pattern_char(token_index,
"@");
297template <url_pattern_regex::regex_concept regex_prov
ider>
301 return is_non_special_pattern_char(token_index,
"/");
304template <url_pattern_regex::regex_concept regex_prov
ider>
308 return is_non_special_pattern_char(token_index,
":");
311template <url_pattern_regex::regex_concept regex_prov
ider>
315 return is_non_special_pattern_char(token_index,
"[");
318template <url_pattern_regex::regex_concept regex_prov
ider>
322 return is_non_special_pattern_char(token_index,
"]");
325template <url_pattern_regex::regex_concept regex_prov
ider>
329 return is_non_special_pattern_char(token_index,
":");
333 ada_log(
"Tokenizer::get_next_code_point called with index=", next_index);
337 size_t number_bytes = 0;
338 unsigned char first_byte = input[next_index];
340 if ((first_byte & 0x80) == 0) {
343 code_point = first_byte;
344 ada_log(
"Tokenizer::get_next_code_point returning ASCII code point=",
345 uint32_t(code_point));
346 ada_log(
"Tokenizer::get_next_code_point next_index =", next_index,
347 " input.size()=", input.size());
350 ada_log(
"Tokenizer::get_next_code_point read first byte=",
351 uint32_t(first_byte));
352 if ((first_byte & 0xE0) == 0xC0) {
353 code_point = first_byte & 0x1F;
355 ada_log(
"Tokenizer::get_next_code_point two bytes");
356 }
else if ((first_byte & 0xF0) == 0xE0) {
357 code_point = first_byte & 0x0F;
359 ada_log(
"Tokenizer::get_next_code_point three bytes");
360 }
else if ((first_byte & 0xF8) == 0xF0) {
361 code_point = first_byte & 0x07;
363 ada_log(
"Tokenizer::get_next_code_point four bytes");
367 for (
size_t i = 1 + next_index; i < number_bytes + next_index; ++i) {
368 unsigned char byte = input[i];
369 ada_log(
"Tokenizer::get_next_code_point read byte=", uint32_t(
byte));
370 code_point = (code_point << 6) | (
byte & 0x3F);
372 ada_log(
"Tokenizer::get_next_code_point returning non-ASCII code point=",
373 uint32_t(code_point));
374 ada_log(
"Tokenizer::get_next_code_point next_index =", next_index,
375 " input.size()=", input.size());
376 next_index += number_bytes;
380 ada_log(
"Tokenizer::seek_and_get_next_code_point called with new_index=",
383 next_index = new_index;
389 size_t value_position,
size_t value_length) {
390 ada_log(
"Tokenizer::add_token called with type=",
to_string(type),
391 " next_position=", next_position,
" value_position=", value_position);
400 token_list.emplace_back(type, index,
401 input.substr(value_position, value_length));
403 index = next_position;
407 size_t next_position,
408 size_t value_position) {
410 auto computed_length = next_position - value_position;
413 add_token(type, next_position, value_position, computed_length);
417 ada_log(
"Tokenizer::add_token_with_defaults called with type=",
426 size_t value_position) {
429 ada_log(
"process_tokenizing_error failed with next_position=",
430 next_position,
" value_position=", value_position);
442template <url_pattern_encoding_callback F>
455template <url_pattern_encoding_callback F>
457 const token* name_token) {
463 if (!name_token && !
token) {
470template <url_pattern_encoding_callback F>
472 ada_log(
"url_pattern_parser::try_consume_token called with type=",
479 if (next_token.type != type)
return nullptr;
486template <url_pattern_encoding_callback F>
507template <url_pattern_encoding_callback F>
509 ada_log(
"url_pattern_parser::consume_required_token called with type=",
516template <url_pattern_encoding_callback F>
521 ada_log(
"pending_fixed_value is empty");
527 if (!encoded_value) {
529 return encoded_value.error();
537 std::move(*encoded_value),
542template <url_pattern_encoding_callback F>
544 std::string_view prefix,
token* name_token,
token* regexp_or_wildcard_token,
545 std::string_view suffix,
token* modifier_token) {
549 if (modifier_token) {
551 if (modifier_token->
value ==
"?") {
553 }
else if (modifier_token->
value ==
"*") {
557 }
else if (modifier_token->
value ==
"+") {
565 if (!name_token && !regexp_or_wildcard_token &&
576 if (!name_token && !regexp_or_wildcard_token) {
580 if (prefix.empty())
return std::nullopt;
584 if (!encoded_value) {
585 return encoded_value.error();
591 std::move(*encoded_value), modifier);
595 std::string regexp_value{};
598 if (!regexp_or_wildcard_token) {
606 regexp_value = regexp_or_wildcard_token->
value;
615 regexp_value.clear();
616 }
else if (regexp_value ==
".*") {
621 regexp_value.clear();
627 name = name_token->
value;
628 }
else if (regexp_or_wildcard_token) {
638 if (std::ranges::any_of(
639 parts, [&name](
const auto& part) {
return part.name == name; })) {
645 if (!encoded_prefix)
return encoded_prefix.error();
649 if (!encoded_suffix)
return encoded_suffix.error();
654 parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
655 std::move(*encoded_prefix), std::move(*encoded_suffix));
659template <url_pattern_encoding_callback F>
662 F& encoding_callback) {
663 ada_log(
"parse_pattern_string input=", input);
672 if (!tokenize_result) {
673 ada_log(
"parse_pattern_string tokenize failed");
674 return tl::unexpected(tokenize_result.error());
676 parser.tokens = std::move(*tokenize_result);
679 while (
parser.can_continue()) {
688 auto regexp_or_wildcard_token =
689 parser.try_consume_regexp_or_wildcard_token(name_token);
691 if (name_token || regexp_or_wildcard_token) {
693 std::string prefix{};
695 if (char_token) prefix = char_token->value;
697 if (!prefix.empty() && prefix != options.
get_prefix()) {
699 parser.pending_fixed_value.append(prefix);
704 if (
auto error =
parser.maybe_add_part_from_the_pending_fixed_value()) {
705 ada_log(
"maybe_add_part_from_the_pending_fixed_value failed");
706 return tl::unexpected(*error);
710 auto modifier_token =
parser.try_consume_modifier_token();
714 parser.add_part(prefix, name_token, regexp_or_wildcard_token,
"",
716 ada_log(
"parser.add_part failed");
717 return tl::unexpected(*error);
724 auto fixed_token = char_token;
732 parser.pending_fixed_value.append(fixed_token->value);
742 auto prefix_ =
parser.consume_text();
748 regexp_or_wildcard_token =
749 parser.try_consume_regexp_or_wildcard_token(name_token);
751 auto suffix_ =
parser.consume_text();
754 ada_log(
"parser.consume_required_token failed");
759 auto modifier_token =
parser.try_consume_modifier_token();
763 parser.add_part(prefix_, name_token, regexp_or_wildcard_token,
764 suffix_, modifier_token)) {
765 return tl::unexpected(*error);
771 if (
auto error =
parser.maybe_add_part_from_the_pending_fixed_value()) {
772 ada_log(
"maybe_add_part_from_the_pending_fixed_value failed on line 992");
773 return tl::unexpected(*error);
780 ada_log(
"parser.parts size is: ",
parser.parts.size());
785template <url_pattern_regex::regex_concept regex_prov
ider>
789 auto& regex = component.
regexp;
790 return regex_provider::regex_match(
"http", regex) ||
791 regex_provider::regex_match(
"https", regex) ||
792 regex_provider::regex_match(
"ws", regex) ||
793 regex_provider::regex_match(
"wss", regex) ||
794 regex_provider::regex_match(
"ftp", regex);
797template <url_pattern_regex::regex_concept regex_prov
ider>
798inline std::optional<errors> constructor_string_parser<
801 "constructor_string_parser::compute_protocol_matches_special_scheme_"
805 auto protocol_string = make_component_string();
811 if (!protocol_component) {
812 ada_log(
"url_pattern_component::compile failed for protocol_string ",
814 return protocol_component.error();
820 protocol_matches_a_special_scheme_flag =
true;
825template <url_pattern_regex::regex_concept regex_prov
ider>
826tl::expected<url_pattern_init, errors>
828 ada_log(
"constructor_string_parser::parse input=", input);
833 return tl::unexpected(token_list.error());
840 parser.token_increment = 1;
850 if (
parser.is_hash_prefix()) {
852 }
else if (
parser.is_search_prefix()) {
884 if (
parser.is_group_open()) {
892 if (
parser.group_depth > 0) {
895 if (
parser.is_group_close()) {
908 if (
parser.is_protocol_suffix()) {
917 if (
parser.is_protocol_suffix()) {
919 if (
const auto error =
920 parser.compute_protocol_matches_special_scheme_flag()) {
921 ada_log(
"compute_protocol_matches_special_scheme_flag failed");
922 return tl::unexpected(*error);
930 if (
parser.next_is_authority_slashes()) {
935 }
else if (
parser.protocol_matches_a_special_scheme_flag) {
942 parser.change_state(next_state, skip);
949 if (
parser.is_an_identity_terminator()) {
952 }
else if (
parser.is_pathname_start() ||
parser.is_search_prefix() ||
953 parser.is_hash_prefix()) {
967 if (
parser.is_password_prefix()) {
969 }
else if (
parser.is_an_identity_terminator()) {
980 if (
parser.is_an_identity_terminator()) {
988 if (
parser.is_an_ipv6_open()) {
989 parser.hostname_ipv6_bracket_depth += 1;
990 }
else if (
parser.is_an_ipv6_close()) {
993 parser.hostname_ipv6_bracket_depth -= 1;
994 }
else if (
parser.is_port_prefix() &&
995 parser.hostname_ipv6_bracket_depth == 0) {
1000 }
else if (
parser.is_pathname_start()) {
1004 }
else if (
parser.is_search_prefix()) {
1008 }
else if (
parser.is_hash_prefix()) {
1019 if (
parser.is_pathname_start()) {
1021 }
else if (
parser.is_search_prefix()) {
1025 }
else if (
parser.is_hash_prefix()) {
1035 if (
parser.is_search_prefix()) {
1037 }
else if (
parser.is_hash_prefix()) {
1047 if (
parser.is_hash_prefix()) {
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
regex_provider::regex_type regexp
void add_token_with_default_length(token_type type, size_t next_position, size_t value_position)
void add_token(token_type type, size_t next_position, size_t value_position, size_t value_length)
void seek_and_get_next_code_point(size_t index)
void get_next_code_point()
std::optional< errors > process_tokenizing_error(size_t next_position, size_t value_position) ada_warn_unused
void add_token_with_defaults(token_type type)
std::optional< errors > add_part(std::string_view prefix, token *name_token, token *regexp_or_wildcard_token, std::string_view suyffix, token *modifier_token) ada_warn_unused
bool consume_required_token(token_type type)
std::vector< token > tokens
token * try_consume_token(token_type type)
std::string pending_fixed_value
std::vector< url_pattern_part > parts
std::string consume_text()
token * try_consume_regexp_or_wildcard_token(const token *name_token)
std::optional< errors > maybe_add_part_from_the_pending_fixed_value() ada_warn_unused
token * try_consume_modifier_token()
std::string segment_wildcard_regexp
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
Definitions for user facing functions for parsing URL and it's components.
Includes the definitions for supported parsers.
std::string to_string(token_type type)
bool protocol_component_matches_special_scheme(url_pattern_component< regex_provider > &component)
tl::expected< std::vector< token >, errors > tokenize(std::string_view input, token_policy policy)
std::string generate_segment_wildcard_regexp(url_pattern_compile_component_options options)
tl::expected< std::string, errors > canonicalize_protocol(std::string_view input)
tl::expected< std::vector< url_pattern_part >, errors > parse_pattern_string(std::string_view input, url_pattern_compile_component_options &options, F &encoding_callback)
tl::expected< result_type, ada::errors > result
std::string_view get_prefix() const ada_warn_unused
static url_pattern_compile_component_options DEFAULT
std::optional< errors > compute_protocol_matches_special_scheme_flag()
constructor_string_parser(std::string_view new_input, std::vector< token > &&new_token_list)
bool is_protocol_suffix()
bool is_an_identity_terminator()
static tl::expected< url_pattern_init, errors > parse(std::string_view input)
bool is_group_open() const
void change_state(State state, size_t skip)
bool is_group_close() const
bool is_password_prefix()
bool next_is_authority_slashes()
ada::url_pattern_regex::std_regex_provider regex_provider
Declaration for the URLPattern helpers.