Ada 3.2.1
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern_helpers-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_HELPERS_INL_H
6#define ADA_URL_PATTERN_HELPERS_INL_H
7
8#include <optional>
9#include <string_view>
10
11#include "ada/common_defs.h"
12#include "ada/expected.h"
14#include "ada/implementation.h"
15
17#ifdef ADA_TESTING
18inline std::string to_string(token_type type) {
19 switch (type) {
21 return "INVALID_CHAR";
23 return "OPEN";
25 return "CLOSE";
27 return "REGEXP";
29 return "NAME";
31 return "CHAR";
33 return "ESCAPED_CHAR";
35 return "OTHER_MODIFIER";
37 return "ASTERISK";
38 case token_type::END:
39 return "END";
40 default:
42 }
43}
44#endif // ADA_TESTING
45
46template <url_pattern_regex::regex_concept regex_provider>
47constexpr void constructor_string_parser<regex_provider>::rewind() {
48 // Set parser's token index to parser's component start.
49 token_index = component_start;
50 // Set parser's token increment to 0.
51 token_increment = 0;
52}
53
54template <url_pattern_regex::regex_concept regex_provider>
55constexpr bool constructor_string_parser<regex_provider>::is_hash_prefix() {
56 // Return the result of running is a non-special pattern char given parser,
57 // parser's token index and "#".
58 return is_non_special_pattern_char(token_index, '#');
59}
60
61template <url_pattern_regex::regex_concept regex_provider>
62constexpr bool constructor_string_parser<regex_provider>::is_search_prefix() {
63 // If result of running is a non-special pattern char given parser, parser's
64 // token index and "?" is true, then return true.
65 if (is_non_special_pattern_char(token_index, '?')) {
66 return true;
67 }
68
69 // If parser's token list[parser's token index]'s value is not "?", then
70 // return false.
71 if (token_list[token_index].value != "?") {
72 return false;
73 }
74
75 // If previous index is less than 0, then return true.
76 if (token_index == 0) return true;
77 // Let previous index be parser's token index - 1.
78 auto previous_index = token_index - 1;
79 // Let previous token be the result of running get a safe token given parser
80 // and previous index.
81 auto previous_token = get_safe_token(previous_index);
82 ADA_ASSERT_TRUE(previous_token);
83 // If any of the following are true, then return false:
84 // - previous token's type is "name".
85 // - previous token's type is "regexp".
86 // - previous token's type is "close".
87 // - previous token's type is "asterisk".
88 return !(previous_token->type == token_type::NAME ||
89 previous_token->type == token_type::REGEXP ||
90 previous_token->type == token_type::CLOSE ||
91 previous_token->type == token_type::ASTERISK);
92}
93
94template <url_pattern_regex::regex_concept regex_provider>
95constexpr bool
96constructor_string_parser<regex_provider>::is_non_special_pattern_char(
97 size_t index, uint32_t value) const {
98 // Let token be the result of running get a safe token given parser and index.
99 auto token = get_safe_token(index);
101
102 // If token's value is not value, then return false.
103 // TODO: Remove this once we make sure get_safe_token returns a non-empty
104 // string.
105 if (!token->value.empty() &&
106 static_cast<uint32_t>(token->value[0]) != value) {
107 return false;
108 }
109
110 // If any of the following are true:
111 // - token's type is "char";
112 // - token's type is "escaped-char"; or
113 // - token's type is "invalid-char",
114 // - then return true.
115 return token->type == token_type::CHAR ||
118}
119
120template <url_pattern_regex::regex_concept regex_provider>
121constexpr const token*
122constructor_string_parser<regex_provider>::get_safe_token(size_t index) const {
123 // If index is less than parser's token list's size, then return parser's
124 // token list[index].
125 if (index < token_list.size()) [[likely]] {
126 return &token_list[index];
127 }
128
129 // Assert: parser's token list's size is greater than or equal to 1.
130 ADA_ASSERT_TRUE(!token_list.empty());
131
132 // Let token be parser's token list[last index].
133 // Assert: token's type is "end".
134 ADA_ASSERT_TRUE(token_list.back().type == token_type::END);
135
136 // Return token.
137 return &token_list.back();
138}
139
140template <url_pattern_regex::regex_concept regex_provider>
141constexpr bool constructor_string_parser<regex_provider>::is_group_open()
142 const {
143 // If parser's token list[parser's token index]'s type is "open", then return
144 // true.
145 return token_list[token_index].type == token_type::OPEN;
146}
147
148template <url_pattern_regex::regex_concept regex_provider>
149constexpr bool constructor_string_parser<regex_provider>::is_group_close()
150 const {
151 // If parser's token list[parser's token index]'s type is "close", then return
152 // true.
153 return token_list[token_index].type == token_type::CLOSE;
154}
155
156template <url_pattern_regex::regex_concept regex_provider>
157constexpr bool
158constructor_string_parser<regex_provider>::next_is_authority_slashes() const {
159 // If the result of running is a non-special pattern char given parser,
160 // parser's token index + 1, and "/" is false, then return false.
161 if (!is_non_special_pattern_char(token_index + 1, '/')) {
162 return false;
163 }
164 // If the result of running is a non-special pattern char given parser,
165 // parser's token index + 2, and "/" is false, then return false.
166 if (!is_non_special_pattern_char(token_index + 2, '/')) {
167 return false;
168 }
169 return true;
170}
171
172template <url_pattern_regex::regex_concept regex_provider>
173constexpr bool constructor_string_parser<regex_provider>::is_protocol_suffix()
174 const {
175 // Return the result of running is a non-special pattern char given parser,
176 // parser's token index, and ":".
177 return is_non_special_pattern_char(token_index, ':');
178}
179
180template <url_pattern_regex::regex_concept regex_provider>
181void constructor_string_parser<regex_provider>::change_state(State new_state,
182 size_t skip) {
183 // If parser's state is not "init", not "authority", and not "done", then set
184 // parser's result[parser's state] to the result of running make a component
185 // string given parser.
186 if (state != State::INIT && state != State::AUTHORITY &&
187 state != State::DONE) {
188 auto value = make_component_string();
189 // TODO: Simplify this.
190 switch (state) {
191 case State::PROTOCOL: {
192 result.protocol = value;
193 break;
194 }
195 case State::USERNAME: {
196 result.username = value;
197 break;
198 }
199 case State::PASSWORD: {
200 result.password = value;
201 break;
202 }
203 case State::HOSTNAME: {
204 result.hostname = value;
205 break;
206 }
207 case State::PORT: {
208 result.port = value;
209 break;
210 }
211 case State::PATHNAME: {
212 result.pathname = value;
213 break;
214 }
215 case State::SEARCH: {
216 result.search = value;
217 break;
218 }
219 case State::HASH: {
220 result.hash = value;
221 break;
222 }
223 default:
225 }
226 }
227
228 // If parser's state is not "init" and new state is not "done", then:
229 if (state != State::INIT && new_state != State::DONE) {
230 // If parser's state is "protocol", "authority", "username", or "password";
231 // new state is "port", "pathname", "search", or "hash"; and parser's
232 // result["hostname"] does not exist, then set parser's result["hostname"]
233 // to the empty string.
234 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
235 state == State::USERNAME || state == State::PASSWORD) &&
236 (new_state == State::PORT || new_state == State::PATHNAME ||
237 new_state == State::SEARCH || new_state == State::HASH) &&
238 !result.hostname)
239 result.hostname = "";
240 }
241
242 // If parser's state is "protocol", "authority", "username", "password",
243 // "hostname", or "port"; new state is "search" or "hash"; and parser's
244 // result["pathname"] does not exist, then:
245 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
246 state == State::USERNAME || state == State::PASSWORD ||
247 state == State::HOSTNAME || state == State::PORT) &&
248 (new_state == State::SEARCH || new_state == State::HASH) &&
249 !result.pathname) {
250 if (protocol_matches_a_special_scheme_flag) {
251 result.pathname = "/";
252 } else {
253 // Otherwise, set parser's result["pathname"] to the empty string.
254 result.pathname = "";
255 }
256 }
257
258 // If parser's state is "protocol", "authority", "username", "password",
259 // "hostname", "port", or "pathname"; new state is "hash"; and parser's
260 // result["search"] does not exist, then set parser's result["search"] to
261 // the empty string.
262 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
263 state == State::USERNAME || state == State::PASSWORD ||
264 state == State::HOSTNAME || state == State::PORT ||
265 state == State::PATHNAME) &&
266 new_state == State::HASH && !result.search) {
267 result.search = "";
268 }
269
270 // Set parser's state to new state.
271 state = new_state;
272 // Increment parser's token index by skip.
273 token_index += skip;
274 // Set parser's component start to parser's token index.
275 component_start = token_index;
276 // Set parser's token increment to 0.
277 token_increment = 0;
278}
279
280template <url_pattern_regex::regex_concept regex_provider>
281std::string constructor_string_parser<regex_provider>::make_component_string() {
282 // Assert: parser's token index is less than parser's token list's size.
283 ADA_ASSERT_TRUE(token_index < token_list.size());
284
285 // Let token be parser's token list[parser's token index].
286 // Let end index be token's index.
287 const auto end_index = token_list[token_index].index;
288 // Let component start token be the result of running get a safe token given
289 // parser and parser's component start.
290 const auto component_start_token = get_safe_token(component_start);
291 ADA_ASSERT_TRUE(component_start_token);
292 // Let component start input index be component start token's index.
293 const auto component_start_input_index = component_start_token->index;
294 // Return the code point substring from component start input index to end
295 // index within parser's input.
296 return input.substr(component_start_input_index,
297 end_index - component_start_input_index);
298}
299
300template <url_pattern_regex::regex_concept regex_provider>
301constexpr bool
302constructor_string_parser<regex_provider>::is_an_identity_terminator() const {
303 // Return the result of running is a non-special pattern char given parser,
304 // parser's token index, and "@".
305 return is_non_special_pattern_char(token_index, '@');
306}
307
308template <url_pattern_regex::regex_concept regex_provider>
309constexpr bool constructor_string_parser<regex_provider>::is_pathname_start()
310 const {
311 // Return the result of running is a non-special pattern char given parser,
312 // parser's token index, and "/".
313 return is_non_special_pattern_char(token_index, '/');
314}
315
316template <url_pattern_regex::regex_concept regex_provider>
317constexpr bool constructor_string_parser<regex_provider>::is_password_prefix()
318 const {
319 // Return the result of running is a non-special pattern char given parser,
320 // parser's token index, and ":".
321 return is_non_special_pattern_char(token_index, ':');
322}
323
324template <url_pattern_regex::regex_concept regex_provider>
325constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_open()
326 const {
327 // Return the result of running is a non-special pattern char given parser,
328 // parser's token index, and "[".
329 return is_non_special_pattern_char(token_index, '[');
330}
331
332template <url_pattern_regex::regex_concept regex_provider>
333constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_close()
334 const {
335 // Return the result of running is a non-special pattern char given parser,
336 // parser's token index, and "]".
337 return is_non_special_pattern_char(token_index, ']');
338}
339
340template <url_pattern_regex::regex_concept regex_provider>
341constexpr bool constructor_string_parser<regex_provider>::is_port_prefix()
342 const {
343 // Return the result of running is a non-special pattern char given parser,
344 // parser's token index, and ":".
345 return is_non_special_pattern_char(token_index, ':');
346}
347
349 ada_log("Tokenizer::get_next_code_point called with index=", next_index);
350 ADA_ASSERT_TRUE(next_index < input.size());
351 // this assumes that we have a valid, non-truncated UTF-8 stream.
352 code_point = 0;
353 size_t number_bytes = 0;
354 unsigned char first_byte = input[next_index];
355
356 if ((first_byte & 0x80) == 0) {
357 // 1-byte character (ASCII)
358 next_index++;
359 code_point = first_byte;
360 ada_log("Tokenizer::get_next_code_point returning ASCII code point=",
361 uint32_t(code_point));
362 ada_log("Tokenizer::get_next_code_point next_index =", next_index,
363 " input.size()=", input.size());
364 return;
365 }
366 ada_log("Tokenizer::get_next_code_point read first byte=",
367 uint32_t(first_byte));
368 if ((first_byte & 0xE0) == 0xC0) {
369 code_point = first_byte & 0x1F;
370 number_bytes = 2;
371 ada_log("Tokenizer::get_next_code_point two bytes");
372 } else if ((first_byte & 0xF0) == 0xE0) {
373 code_point = first_byte & 0x0F;
374 number_bytes = 3;
375 ada_log("Tokenizer::get_next_code_point three bytes");
376 } else if ((first_byte & 0xF8) == 0xF0) {
377 code_point = first_byte & 0x07;
378 number_bytes = 4;
379 ada_log("Tokenizer::get_next_code_point four bytes");
380 }
381 ADA_ASSERT_TRUE(number_bytes + next_index <= input.size());
382
383 for (size_t i = 1 + next_index; i < number_bytes + next_index; ++i) {
384 unsigned char byte = input[i];
385 ada_log("Tokenizer::get_next_code_point read byte=", uint32_t(byte));
386 code_point = (code_point << 6) | (byte & 0x3F);
387 }
388 ada_log("Tokenizer::get_next_code_point returning non-ASCII code point=",
389 uint32_t(code_point));
390 ada_log("Tokenizer::get_next_code_point next_index =", next_index,
391 " input.size()=", input.size());
392 next_index += number_bytes;
393}
394
395constexpr void Tokenizer::seek_and_get_next_code_point(size_t new_index) {
396 ada_log("Tokenizer::seek_and_get_next_code_point called with new_index=",
397 new_index);
398 // Set tokenizer's next index to index.
399 next_index = new_index;
400 // Run get the next code point given tokenizer.
402}
403
404inline void Tokenizer::add_token(token_type type, size_t next_position,
405 size_t value_position, size_t value_length) {
406 ada_log("Tokenizer::add_token called with type=", to_string(type),
407 " next_position=", next_position, " value_position=", value_position);
408 ADA_ASSERT_TRUE(next_position >= value_position);
409
410 // Let token be a new token.
411 // Set token's type to type.
412 // Set token's index to tokenizer's index.
413 // Set token's value to the code point substring from value position with
414 // length value length within tokenizer's input.
415 // Append token to the back of tokenizer's token list.
416 token_list.emplace_back(type, index,
417 input.substr(value_position, value_length));
418 // Set tokenizer's index to next position.
419 index = next_position;
420}
421
423 size_t next_position,
424 size_t value_position) {
425 // Let computed length be next position - value position.
426 auto computed_length = next_position - value_position;
427 // Run add a token given tokenizer, type, next position, value position, and
428 // computed length.
429 add_token(type, next_position, value_position, computed_length);
430}
431
433 ada_log("Tokenizer::add_token_with_defaults called with type=",
434 to_string(type));
435 // Run add a token with default length given tokenizer, type, tokenizer's next
436 // index, and tokenizer's index.
437 add_token_with_default_length(type, next_index, index);
438}
439
440inline ada_warn_unused std::optional<errors>
442 size_t value_position) {
443 // If tokenizer's policy is "strict", then throw a TypeError.
444 if (policy == token_policy::strict) {
445 ada_log("process_tokenizing_error failed with next_position=",
446 next_position, " value_position=", value_position);
447 return errors::type_error;
448 }
449 // Assert: tokenizer's policy is "lenient".
451 // Run add a token with default length given tokenizer, "invalid-char", next
452 // position, and value position.
454 value_position);
455 return std::nullopt;
456}
457
458template <url_pattern_encoding_callback F>
460 // Let token be the result of running try to consume a token given parser and
461 // "other-modifier".
463 // If token is not null, then return token.
464 if (token) return token;
465 // Set token to the result of running try to consume a token given parser and
466 // "asterisk".
467 // Return token.
469}
470
471template <url_pattern_encoding_callback F>
473 const token* name_token) {
474 // Let token be the result of running try to consume a token given parser and
475 // "regexp".
477 // If name token is null and token is null, then set token to the result of
478 // running try to consume a token given parser and "asterisk".
479 if (!name_token && !token) {
481 }
482 // Return token.
483 return token;
484}
485
486template <url_pattern_encoding_callback F>
488 ada_log("url_pattern_parser::try_consume_token called with type=",
489 to_string(type));
490 // Assert: parser's index is less than parser's token list size.
491 ADA_ASSERT_TRUE(index < tokens.size());
492 // Let next token be parser's token list[parser's index].
493 auto& next_token = tokens[index];
494 // If next token's type is not type return null.
495 if (next_token.type != type) return nullptr;
496 // Increase parser's index by 1.
497 index++;
498 // Return next token.
499 return &next_token;
500}
501
502template <url_pattern_encoding_callback F>
504 // Let result be the empty string.
505 std::string result{};
506 // While true:
507 while (true) {
508 // Let token be the result of running try to consume a token given parser
509 // and "char".
511 // If token is null, then set token to the result of running try to consume
512 // a token given parser and "escaped-char".
514 // If token is null, then break.
515 if (!token) break;
516 // Append token's value to the end of result.
517 result.append(token->value);
518 }
519 // Return result.
520 return result;
521}
522
523template <url_pattern_encoding_callback F>
525 ada_log("url_pattern_parser::consume_required_token called with type=",
526 to_string(type));
527 // Let result be the result of running try to consume a token given parser and
528 // type.
529 return try_consume_token(type) != nullptr;
530}
531
532template <url_pattern_encoding_callback F>
533std::optional<errors>
535 // If parser's pending fixed value is the empty string, then return.
536 if (pending_fixed_value.empty()) {
537 ada_log("pending_fixed_value is empty");
538 return std::nullopt;
539 }
540 // Let encoded value be the result of running parser's encoding callback given
541 // parser's pending fixed value.
542 auto encoded_value = encoding_callback(pending_fixed_value);
543 if (!encoded_value) {
544 ada_log("failed to encode pending_fixed_value: ", pending_fixed_value);
545 return encoded_value.error();
546 }
547 // Set parser's pending fixed value to the empty string.
548 pending_fixed_value.clear();
549 // Let part be a new part whose type is "fixed-text", value is encoded value,
550 // and modifier is "none".
551 // Append part to parser's part list.
553 std::move(*encoded_value),
555 return std::nullopt;
556}
557
558template <url_pattern_encoding_callback F>
560 std::string_view prefix, token* name_token, token* regexp_or_wildcard_token,
561 std::string_view suffix, token* modifier_token) {
562 // Let modifier be "none".
563 auto modifier = url_pattern_part_modifier::none;
564 // If modifier token is not null:
565 if (modifier_token) {
566 // If modifier token's value is "?" then set modifier to "optional".
567 if (modifier_token->value == "?") {
569 } else if (modifier_token->value == "*") {
570 // Otherwise if modifier token's value is "*" then set modifier to
571 // "zero-or-more".
573 } else if (modifier_token->value == "+") {
574 // Otherwise if modifier token's value is "+" then set modifier to
575 // "one-or-more".
577 }
578 }
579 // If name token is null and regexp or wildcard token is null and modifier
580 // is "none":
581 if (!name_token && !regexp_or_wildcard_token &&
583 // Append prefix to the end of parser's pending fixed value.
584 pending_fixed_value.append(prefix);
585 return std::nullopt;
586 }
587 // Run maybe add a part from the pending fixed value given parser.
589 return *error;
590 }
591 // If name token is null and regexp or wildcard token is null:
592 if (!name_token && !regexp_or_wildcard_token) {
593 // Assert: suffix is the empty string.
594 ADA_ASSERT_TRUE(suffix.empty());
595 // If prefix is the empty string, then return.
596 if (prefix.empty()) return std::nullopt;
597 // Let encoded value be the result of running parser's encoding callback
598 // given prefix.
599 auto encoded_value = encoding_callback(prefix);
600 if (!encoded_value) {
601 return encoded_value.error();
602 }
603 // Let part be a new part whose type is "fixed-text", value is encoded
604 // value, and modifier is modifier.
605 // Append part to parser's part list.
607 std::move(*encoded_value), modifier);
608 return std::nullopt;
609 }
610 // Let regexp value be the empty string.
611 std::string regexp_value{};
612 // If regexp or wildcard token is null, then set regexp value to parser's
613 // segment wildcard regexp.
614 if (!regexp_or_wildcard_token) {
615 regexp_value = segment_wildcard_regexp;
616 } else if (regexp_or_wildcard_token->type == token_type::ASTERISK) {
617 // Otherwise if regexp or wildcard token's type is "asterisk", then set
618 // regexp value to the full wildcard regexp value.
619 regexp_value = ".*";
620 } else {
621 // Otherwise set regexp value to regexp or wildcard token's value.
622 regexp_value = regexp_or_wildcard_token->value;
623 }
624 // Let type be "regexp".
626 // If regexp value is parser's segment wildcard regexp:
627 if (regexp_value == segment_wildcard_regexp) {
628 // Set type to "segment-wildcard".
630 // Set regexp value to the empty string.
631 regexp_value.clear();
632 } else if (regexp_value == ".*") {
633 // Otherwise if regexp value is the full wildcard regexp value:
634 // Set type to "full-wildcard".
636 // Set regexp value to the empty string.
637 regexp_value.clear();
638 }
639 // Let name be the empty string.
640 std::string name{};
641 // If name token is not null, then set name to name token's value.
642 if (name_token) {
643 name = name_token->value;
644 } else if (regexp_or_wildcard_token != nullptr) {
645 // Otherwise if regexp or wildcard token is not null:
646 // Set name to parser's next numeric name, serialized.
647 name = std::to_string(next_numeric_name);
648 // Increment parser's next numeric name by 1.
650 }
651 // If the result of running is a duplicate name given parser and name is
652 // true, then throw a TypeError.
653 if (std::ranges::any_of(
654 parts, [&name](const auto& part) { return part.name == name; })) {
655 return errors::type_error;
656 }
657 // Let encoded prefix be the result of running parser's encoding callback
658 // given prefix.
659 auto encoded_prefix = encoding_callback(prefix);
660 if (!encoded_prefix) return encoded_prefix.error();
661 // Let encoded suffix be the result of running parser's encoding callback
662 // given suffix.
663 auto encoded_suffix = encoding_callback(suffix);
664 if (!encoded_suffix) return encoded_suffix.error();
665 // Let part be a new part whose type is type, value is regexp value,
666 // modifier is modifier, name is name, prefix is encoded prefix, and suffix
667 // is encoded suffix.
668 // Append part to parser's part list.
669 parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
670 std::move(*encoded_prefix), std::move(*encoded_suffix));
671 return std::nullopt;
672}
673
674template <url_pattern_encoding_callback F>
675tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
676 std::string_view input, url_pattern_compile_component_options& options,
677 F& encoding_callback) {
678 ada_log("parse_pattern_string input=", input);
679 // Let parser be a new pattern parser whose encoding callback is encoding
680 // callback and segment wildcard regexp is the result of running generate a
681 // segment wildcard regexp given options.
683 encoding_callback, generate_segment_wildcard_regexp(options));
684 // Set parser's token list to the result of running tokenize given input and
685 // "strict".
686 auto tokenize_result = tokenize(input, token_policy::strict);
687 if (!tokenize_result) {
688 ada_log("parse_pattern_string tokenize failed");
689 return tl::unexpected(tokenize_result.error());
690 }
691 parser.tokens = std::move(*tokenize_result);
692
693 // While parser's index is less than parser's token list's size:
694 while (parser.can_continue()) {
695 // Let char token be the result of running try to consume a token given
696 // parser and "char".
697 auto char_token = parser.try_consume_token(token_type::CHAR);
698 // Let name token be the result of running try to consume a token given
699 // parser and "name".
700 auto name_token = parser.try_consume_token(token_type::NAME);
701 // Let regexp or wildcard token be the result of running try to consume a
702 // regexp or wildcard token given parser and name token.
703 auto regexp_or_wildcard_token =
704 parser.try_consume_regexp_or_wildcard_token(name_token);
705 // If name token is not null or regexp or wildcard token is not null:
706 if (name_token || regexp_or_wildcard_token) {
707 // Let prefix be the empty string.
708 std::string prefix{};
709 // If char token is not null then set prefix to char token's value.
710 if (char_token) prefix = char_token->value;
711 // If prefix is not the empty string and not options's prefix code point:
712 if (!prefix.empty() && prefix != options.get_prefix()) {
713 // Append prefix to the end of parser's pending fixed value.
714 parser.pending_fixed_value.append(prefix);
715 // Set prefix to the empty string.
716 prefix.clear();
717 }
718 // Run maybe add a part from the pending fixed value given parser.
719 if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) {
720 ada_log("maybe_add_part_from_the_pending_fixed_value failed");
721 return tl::unexpected(*error);
722 }
723 // Let modifier token be the result of running try to consume a modifier
724 // token given parser.
725 auto modifier_token = parser.try_consume_modifier_token();
726 // Run add a part given parser, prefix, name token, regexp or wildcard
727 // token, the empty string, and modifier token.
728 if (auto error =
729 parser.add_part(prefix, name_token, regexp_or_wildcard_token, "",
730 modifier_token)) {
731 ada_log("parser.add_part failed");
732 return tl::unexpected(*error);
733 }
734 // Continue.
735 continue;
736 }
737
738 // Let fixed token be char token.
739 auto fixed_token = char_token;
740 // If fixed token is null, then set fixed token to the result of running try
741 // to consume a token given parser and "escaped-char".
742 if (!fixed_token)
743 fixed_token = parser.try_consume_token(token_type::ESCAPED_CHAR);
744 // If fixed token is not null:
745 if (fixed_token) {
746 // Append fixed token's value to parser's pending fixed value.
747 parser.pending_fixed_value.append(fixed_token->value);
748 // Continue.
749 continue;
750 }
751 // Let open token be the result of running try to consume a token given
752 // parser and "open".
753 auto open_token = parser.try_consume_token(token_type::OPEN);
754 // If open token is not null:
755 if (open_token) {
756 // Set prefix be the result of running consume text given parser.
757 auto prefix_ = parser.consume_text();
758 // Set name token to the result of running try to consume a token given
759 // parser and "name".
760 name_token = parser.try_consume_token(token_type::NAME);
761 // Set regexp or wildcard token to the result of running try to consume a
762 // regexp or wildcard token given parser and name token.
763 regexp_or_wildcard_token =
764 parser.try_consume_regexp_or_wildcard_token(name_token);
765 // Let suffix be the result of running consume text given parser.
766 auto suffix_ = parser.consume_text();
767 // Run consume a required token given parser and "close".
768 if (!parser.consume_required_token(token_type::CLOSE)) {
769 ada_log("parser.consume_required_token failed");
770 return tl::unexpected(errors::type_error);
771 }
772 // Set modifier token to the result of running try to consume a modifier
773 // token given parser.
774 auto modifier_token = parser.try_consume_modifier_token();
775 // Run add a part given parser, prefix, name token, regexp or wildcard
776 // token, suffix, and modifier token.
777 if (auto error =
778 parser.add_part(prefix_, name_token, regexp_or_wildcard_token,
779 suffix_, modifier_token)) {
780 return tl::unexpected(*error);
781 }
782 // Continue.
783 continue;
784 }
785 // Run maybe add a part from the pending fixed value given parser.
786 if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) {
787 ada_log("maybe_add_part_from_the_pending_fixed_value failed on line 992");
788 return tl::unexpected(*error);
789 }
790 // Run consume a required token given parser and "end".
791 if (!parser.consume_required_token(token_type::END)) {
792 return tl::unexpected(errors::type_error);
793 }
794 }
795 ada_log("parser.parts size is: ", parser.parts.size());
796 // Return parser's part list.
797 return parser.parts;
798}
799
800template <url_pattern_regex::regex_concept regex_provider>
803 // let's avoid unnecessary copy here.
804 auto& regex = component.regexp;
805 return regex_provider::regex_match("http", regex) ||
806 regex_provider::regex_match("https", regex) ||
807 regex_provider::regex_match("ws", regex) ||
808 regex_provider::regex_match("wss", regex) ||
809 regex_provider::regex_match("ftp", regex);
810}
811
812template <url_pattern_regex::regex_concept regex_provider>
813inline std::optional<errors> constructor_string_parser<
815 ada_log(
816 "constructor_string_parser::compute_protocol_matches_special_scheme_"
817 "flag");
818 // Let protocol string be the result of running make a component string given
819 // parser.
820 auto protocol_string = make_component_string();
821 // Let protocol component be the result of compiling a component given
822 // protocol string, canonicalize a protocol, and default options.
823 auto protocol_component = url_pattern_component<regex_provider>::compile(
824 protocol_string, canonicalize_protocol,
826 if (!protocol_component) {
827 ada_log("url_pattern_component::compile failed for protocol_string ",
828 protocol_string);
829 return protocol_component.error();
830 }
831 // If the result of running protocol component matches a special scheme given
832 // protocol component is true, then set parser's protocol matches a special
833 // scheme flag to true.
834 if (protocol_component_matches_special_scheme(*protocol_component)) {
835 protocol_matches_a_special_scheme_flag = true;
836 }
837 return std::nullopt;
838}
839
840template <url_pattern_regex::regex_concept regex_provider>
841tl::expected<url_pattern_init, errors>
843 ada_log("constructor_string_parser::parse input=", input);
844 // Let parser be a new constructor string parser whose input is input and
845 // token list is the result of running tokenize given input and "lenient".
846 auto token_list = tokenize(input, token_policy::lenient);
847 if (!token_list) {
848 return tl::unexpected(token_list.error());
849 }
850 auto parser = constructor_string_parser(input, std::move(*token_list));
851
852 // While parser's token index is less than parser's token list size:
853 while (parser.token_index < parser.token_list.size()) {
854 // Set parser's token increment to 1.
855 parser.token_increment = 1;
856
857 // If parser's token list[parser's token index]'s type is "end" then:
858 if (parser.token_list[parser.token_index].type == token_type::END) {
859 // If parser's state is "init":
860 if (parser.state == State::INIT) {
861 // Run rewind given parser.
862 parser.rewind();
863 // If the result of running is a hash prefix given parser is true, then
864 // run change state given parser, "hash" and 1.
865 if (parser.is_hash_prefix()) {
866 parser.change_state(State::HASH, 1);
867 } else if (parser.is_search_prefix()) {
868 // Otherwise if the result of running is a search prefix given parser
869 // is true: Run change state given parser, "search" and 1.
870 parser.change_state(State::SEARCH, 1);
871 } else {
872 // Run change state given parser, "pathname" and 0.
873 parser.change_state(State::PATHNAME, 0);
874 }
875 // Increment parser's token index by parser's token increment.
876 parser.token_index += parser.token_increment;
877 // Continue.
878 continue;
879 }
880
881 if (parser.state == State::AUTHORITY) {
882 // If parser's state is "authority":
883 // Run rewind and set state given parser, and "hostname".
884 parser.rewind();
885 parser.change_state(State::HOSTNAME, 0);
886 // Increment parser's token index by parser's token increment.
887 parser.token_index += parser.token_increment;
888 // Continue.
889 continue;
890 }
891
892 // Run change state given parser, "done" and 0.
893 parser.change_state(State::DONE, 0);
894 // Break.
895 break;
896 }
897
898 // If the result of running is a group open given parser is true:
899 if (parser.is_group_open()) {
900 // Increment parser's group depth by 1.
901 parser.group_depth += 1;
902 // Increment parser's token index by parser's token increment.
903 parser.token_index += parser.token_increment;
904 }
905
906 // If parser's group depth is greater than 0:
907 if (parser.group_depth > 0) {
908 // If the result of running is a group close given parser is true, then
909 // decrement parser's group depth by 1.
910 if (parser.is_group_close()) {
911 parser.group_depth -= 1;
912 } else {
913 // Increment parser's token index by parser's token increment.
914 parser.token_index += parser.token_increment;
915 continue;
916 }
917 }
918
919 // Switch on parser's state and run the associated steps:
920 switch (parser.state) {
921 case State::INIT: {
922 // If the result of running is a protocol suffix given parser is true:
923 if (parser.is_protocol_suffix()) {
924 // Run rewind and set state given parser and "protocol".
925 parser.rewind();
926 parser.change_state(State::PROTOCOL, 0);
927 }
928 break;
929 }
930 case State::PROTOCOL: {
931 // If the result of running is a protocol suffix given parser is true:
932 if (parser.is_protocol_suffix()) {
933 // Run compute protocol matches a special scheme flag given parser.
934 if (const auto error =
935 parser.compute_protocol_matches_special_scheme_flag()) {
936 ada_log("compute_protocol_matches_special_scheme_flag failed");
937 return tl::unexpected(*error);
938 }
939 // Let next state be "pathname".
940 auto next_state = State::PATHNAME;
941 // Let skip be 1.
942 auto skip = 1;
943 // If the result of running next is authority slashes given parser is
944 // true:
945 if (parser.next_is_authority_slashes()) {
946 // Set next state to "authority".
947 next_state = State::AUTHORITY;
948 // Set skip to 3.
949 skip = 3;
950 } else if (parser.protocol_matches_a_special_scheme_flag) {
951 // Otherwise if parser's protocol matches a special scheme flag is
952 // true, then set next state to "authority".
953 next_state = State::AUTHORITY;
954 }
955
956 // Run change state given parser, next state, and skip.
957 parser.change_state(next_state, skip);
958 }
959 break;
960 }
961 case State::AUTHORITY: {
962 // If the result of running is an identity terminator given parser is
963 // true, then run rewind and set state given parser and "username".
964 if (parser.is_an_identity_terminator()) {
965 parser.rewind();
966 parser.change_state(State::USERNAME, 0);
967 } else if (parser.is_pathname_start() || parser.is_search_prefix() ||
968 parser.is_hash_prefix()) {
969 // Otherwise if any of the following are true:
970 // - the result of running is a pathname start given parser;
971 // - the result of running is a search prefix given parser; or
972 // - the result of running is a hash prefix given parser,
973 // then run rewind and set state given parser and "hostname".
974 parser.rewind();
975 parser.change_state(State::HOSTNAME, 0);
976 }
977 break;
978 }
979 case State::USERNAME: {
980 // If the result of running is a password prefix given parser is true,
981 // then run change state given parser, "password", and 1.
982 if (parser.is_password_prefix()) {
983 parser.change_state(State::PASSWORD, 1);
984 } else if (parser.is_an_identity_terminator()) {
985 // Otherwise if the result of running is an identity terminator given
986 // parser is true, then run change state given parser, "hostname",
987 // and 1.
988 parser.change_state(State::HOSTNAME, 1);
989 }
990 break;
991 }
992 case State::PASSWORD: {
993 // If the result of running is an identity terminator given parser is
994 // true, then run change state given parser, "hostname", and 1.
995 if (parser.is_an_identity_terminator()) {
996 parser.change_state(State::HOSTNAME, 1);
997 }
998 break;
999 }
1000 case State::HOSTNAME: {
1001 // If the result of running is an IPv6 open given parser is true, then
1002 // increment parser's hostname IPv6 bracket depth by 1.
1003 if (parser.is_an_ipv6_open()) {
1004 parser.hostname_ipv6_bracket_depth += 1;
1005 } else if (parser.is_an_ipv6_close()) {
1006 // Otherwise if the result of running is an IPv6 close given parser is
1007 // true, then decrement parser's hostname IPv6 bracket depth by 1.
1008 parser.hostname_ipv6_bracket_depth -= 1;
1009 } else if (parser.is_port_prefix() &&
1010 parser.hostname_ipv6_bracket_depth == 0) {
1011 // Otherwise if the result of running is a port prefix given parser is
1012 // true and parser's hostname IPv6 bracket depth is zero, then run
1013 // change state given parser, "port", and 1.
1014 parser.change_state(State::PORT, 1);
1015 } else if (parser.is_pathname_start()) {
1016 // Otherwise if the result of running is a pathname start given parser
1017 // is true, then run change state given parser, "pathname", and 0.
1018 parser.change_state(State::PATHNAME, 0);
1019 } else if (parser.is_search_prefix()) {
1020 // Otherwise if the result of running is a search prefix given parser
1021 // is true, then run change state given parser, "search", and 1.
1022 parser.change_state(State::SEARCH, 1);
1023 } else if (parser.is_hash_prefix()) {
1024 // Otherwise if the result of running is a hash prefix given parser is
1025 // true, then run change state given parser, "hash", and 1.
1026 parser.change_state(State::HASH, 1);
1027 }
1028
1029 break;
1030 }
1031 case State::PORT: {
1032 // If the result of running is a pathname start given parser is true,
1033 // then run change state given parser, "pathname", and 0.
1034 if (parser.is_pathname_start()) {
1035 parser.change_state(State::PATHNAME, 0);
1036 } else if (parser.is_search_prefix()) {
1037 // Otherwise if the result of running is a search prefix given parser
1038 // is true, then run change state given parser, "search", and 1.
1039 parser.change_state(State::SEARCH, 1);
1040 } else if (parser.is_hash_prefix()) {
1041 // Otherwise if the result of running is a hash prefix given parser is
1042 // true, then run change state given parser, "hash", and 1.
1043 parser.change_state(State::HASH, 1);
1044 }
1045 break;
1046 }
1047 case State::PATHNAME: {
1048 // If the result of running is a search prefix given parser is true,
1049 // then run change state given parser, "search", and 1.
1050 if (parser.is_search_prefix()) {
1051 parser.change_state(State::SEARCH, 1);
1052 } else if (parser.is_hash_prefix()) {
1053 // Otherwise if the result of running is a hash prefix given parser is
1054 // true, then run change state given parser, "hash", and 1.
1055 parser.change_state(State::HASH, 1);
1056 }
1057 break;
1058 }
1059 case State::SEARCH: {
1060 // If the result of running is a hash prefix given parser is true, then
1061 // run change state given parser, "hash", and 1.
1062 if (parser.is_hash_prefix()) {
1063 parser.change_state(State::HASH, 1);
1064 }
1065 break;
1066 }
1067 case State::HASH: {
1068 // Do nothing
1069 break;
1070 }
1071 default: {
1072 // Assert: This step is never reached.
1073 unreachable();
1074 }
1075 }
1076
1077 // Increment parser's token index by parser's token increment.
1078 parser.token_index += parser.token_increment;
1079 }
1080
1081 // If parser's result contains "hostname" and not "port", then set parser's
1082 // result["port"] to the empty string.
1083 if (parser.result.hostname && !parser.result.port) {
1084 parser.result.port = "";
1085 }
1086
1087 // Return parser's result.
1088 return parser.result;
1089}
1090
1091} // namespace ada::url_pattern_helpers
1092
1093#endif
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
regex_provider::regex_type regexp
void add_token_with_default_length(token_type type, size_t next_position, size_t value_position)
void add_token(token_type type, size_t next_position, size_t value_position, size_t value_length)
constexpr void seek_and_get_next_code_point(size_t index)
std::optional< errors > process_tokenizing_error(size_t next_position, size_t value_position) ada_warn_unused
std::optional< errors > add_part(std::string_view prefix, token *name_token, token *regexp_or_wildcard_token, std::string_view suyffix, token *modifier_token) ada_warn_unused
token * try_consume_regexp_or_wildcard_token(const token *name_token)
std::optional< errors > maybe_add_part_from_the_pending_fixed_value() ada_warn_unused
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_warn_unused
Definition common_defs.h:85
Definitions for user facing functions for parsing URL and it's components.
Includes the definitions for supported parsers.
Definition parser-inl.h:18
bool protocol_component_matches_special_scheme(url_pattern_component< regex_provider > &component)
tl::expected< std::vector< token >, errors > tokenize(std::string_view input, token_policy policy)
std::string generate_segment_wildcard_regexp(url_pattern_compile_component_options options)
tl::expected< std::string, errors > canonicalize_protocol(std::string_view input)
tl::expected< std::vector< url_pattern_part >, errors > parse_pattern_string(std::string_view input, url_pattern_compile_component_options &options, F &encoding_callback)
ada_warn_unused std::string to_string(encoding_type type)
errors
Definition errors.h:10
@ type_error
Definition errors.h:10
state
Definition state.h:17
void unreachable()
tl::expected< result_type, ada::errors > result
std::string_view get_prefix() const ada_warn_unused
static url_pattern_compile_component_options DEFAULT
constructor_string_parser(std::string_view new_input, std::vector< token > &&new_token_list)
static tl::expected< url_pattern_init, errors > parse(std::string_view input)
ada::url_pattern_regex::std_regex_provider regex_provider
Definition url_pattern.cc:9
Declaration for the URLPattern helpers.