Ada 3.0.1
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern_helpers-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_HELPERS_INL_H
6#define ADA_URL_PATTERN_HELPERS_INL_H
7
8#include <optional>
9#include <string_view>
10
11#include "ada/common_defs.h"
12#include "ada/expected.h"
14#include "ada/implementation.h"
15
17inline std::string to_string(token_type type) {
18 switch (type) {
20 return "INVALID_CHAR";
22 return "OPEN";
24 return "CLOSE";
26 return "REGEXP";
28 return "NAME";
30 return "CHAR";
32 return "ESCAPED_CHAR";
34 return "OTHER_MODIFIER";
36 return "ASTERISK";
37 case token_type::END:
38 return "END";
39 default:
41 }
42}
43
44template <url_pattern_regex::regex_concept regex_provider>
46 // Set parser’s token index to parser’s component start.
47 token_index = component_start;
48 // Set parser’s token increment to 0.
49 token_increment = 0;
50}
51
52template <url_pattern_regex::regex_concept regex_provider>
54 // Return the result of running is a non-special pattern char given parser,
55 // parser’s token index and "#".
56 return is_non_special_pattern_char(token_index, "#");
57}
58
59template <url_pattern_regex::regex_concept regex_provider>
61 // If result of running is a non-special pattern char given parser, parser’s
62 // token index and "?" is true, then return true.
63 if (is_non_special_pattern_char(token_index, "?")) {
64 return true;
65 }
66
67 // If parser’s token list[parser’s token index]'s value is not "?", then
68 // return false.
69 if (token_list[token_index].value != "?") {
70 return false;
71 }
72
73 // If previous index is less than 0, then return true.
74 if (token_index == 0) return true;
75 // Let previous index be parser’s token index − 1.
76 auto previous_index = token_index - 1;
77 // Let previous token be the result of running get a safe token given parser
78 // and previous index.
79 auto previous_token = get_safe_token(previous_index);
80 ADA_ASSERT_TRUE(previous_token);
81 // If any of the following are true, then return false:
82 // - previous token’s type is "name".
83 // - previous token’s type is "regexp".
84 // - previous token’s type is "close".
85 // - previous token’s type is "asterisk".
86 return !(previous_token->type == token_type::NAME ||
87 previous_token->type == token_type::REGEXP ||
88 previous_token->type == token_type::CLOSE ||
89 previous_token->type == token_type::ASTERISK);
90}
91
92template <url_pattern_regex::regex_concept regex_provider>
93bool constructor_string_parser<regex_provider>::is_non_special_pattern_char(
94 size_t index, std::string_view value) {
95 // Let token be the result of running get a safe token given parser and index.
96 auto token = get_safe_token(index);
98
99 // If token’s value is not value, then return false.
100 if (token->value != value) {
101 return false;
102 }
103
104 // If any of the following are true:
105 // - token’s type is "char";
106 // - token’s type is "escaped-char"; or
107 // - token’s type is "invalid-char",
108 // - then return true.
109 return token->type == token_type::CHAR ||
110 token->type == token_type::ESCAPED_CHAR ||
111 token->type == token_type::INVALID_CHAR;
112}
113
114template <url_pattern_regex::regex_concept regex_provider>
115const token* constructor_string_parser<regex_provider>::get_safe_token(
116 size_t index) {
117 // If index is less than parser’s token list's size, then return parser’s
118 // token list[index].
119 if (index < token_list.size()) [[likely]] {
120 return &token_list[index];
121 }
122
123 // Assert: parser’s token list's size is greater than or equal to 1.
124 ADA_ASSERT_TRUE(!token_list.empty());
125
126 // Let token be parser’s token list[last index].
127 // Assert: token’s type is "end".
128 ADA_ASSERT_TRUE(token_list.back().type == token_type::END);
129
130 // Return token.
131 return &token_list.back();
132}
133
134template <url_pattern_regex::regex_concept regex_provider>
136 // If parser’s token list[parser’s token index]'s type is "open", then return
137 // true.
138 return token_list[token_index].type == token_type::OPEN;
139}
140
141template <url_pattern_regex::regex_concept regex_provider>
143 // If parser’s token list[parser’s token index]'s type is "close", then return
144 // true.
145 return token_list[token_index].type == token_type::CLOSE;
146}
147
148template <url_pattern_regex::regex_concept regex_provider>
150 // If the result of running is a non-special pattern char given parser,
151 // parser’s token index + 1, and "/" is false, then return false.
152 if (!is_non_special_pattern_char(token_index + 1, "/")) {
153 return false;
154 }
155 // If the result of running is a non-special pattern char given parser,
156 // parser’s token index + 2, and "/" is false, then return false.
157 if (!is_non_special_pattern_char(token_index + 2, "/")) {
158 return false;
159 }
160 return true;
161}
162
163template <url_pattern_regex::regex_concept regex_provider>
165 // Return the result of running is a non-special pattern char given parser,
166 // parser’s token index, and ":".
167 return is_non_special_pattern_char(token_index, ":");
168}
169
170template <url_pattern_regex::regex_concept regex_provider>
172 size_t skip) {
173 // If parser’s state is not "init", not "authority", and not "done", then set
174 // parser’s result[parser’s state] to the result of running make a component
175 // string given parser.
176 if (state != State::INIT && state != State::AUTHORITY &&
177 state != State::DONE) {
178 auto value = make_component_string();
179 // TODO: Simplify this.
180 switch (state) {
181 case State::PROTOCOL: {
182 result.protocol = value;
183 break;
184 }
185 case State::USERNAME: {
186 result.username = value;
187 break;
188 }
189 case State::PASSWORD: {
190 result.password = value;
191 break;
192 }
193 case State::HOSTNAME: {
194 result.hostname = value;
195 break;
196 }
197 case State::PORT: {
198 result.port = value;
199 break;
200 }
201 case State::PATHNAME: {
202 result.pathname = value;
203 break;
204 }
205 case State::SEARCH: {
206 result.search = value;
207 break;
208 }
209 case State::HASH: {
210 result.hash = value;
211 break;
212 }
213 default:
215 }
216 }
217
218 // If parser’s state is not "init" and new state is not "done", then:
219 if (state != State::INIT && new_state != State::DONE) {
220 // If parser’s state is "protocol", "authority", "username", or "password";
221 // new state is "port", "pathname", "search", or "hash"; and parser’s
222 // result["hostname"] does not exist, then set parser’s result["hostname"]
223 // to the empty string.
224 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
225 state == State::USERNAME || state == State::PASSWORD) &&
226 (new_state == State::PORT || new_state == State::PATHNAME ||
227 new_state == State::SEARCH || new_state == State::HASH) &&
228 !result.hostname)
229 result.hostname = "";
230 }
231
232 // If parser’s state is "protocol", "authority", "username", "password",
233 // "hostname", or "port"; new state is "search" or "hash"; and parser’s
234 // result["pathname"] does not exist, then:
235 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
236 state == State::USERNAME || state == State::PASSWORD ||
237 state == State::HOSTNAME || state == State::PORT) &&
238 (new_state == State::SEARCH || new_state == State::HASH) &&
239 !result.pathname) {
240 if (protocol_matches_a_special_scheme_flag) {
241 result.pathname = "/";
242 } else {
243 // Otherwise, set parser’s result["pathname"] to the empty string.
244 result.pathname = "";
245 }
246 }
247
248 // If parser’s state is "protocol", "authority", "username", "password",
249 // "hostname", "port", or "pathname"; new state is "hash"; and parser’s
250 // result["search"] does not exist, then set parser’s result["search"] to
251 // the empty string.
252 if ((state == State::PROTOCOL || state == State::AUTHORITY ||
253 state == State::USERNAME || state == State::PASSWORD ||
254 state == State::HOSTNAME || state == State::PORT ||
255 state == State::PATHNAME) &&
256 new_state == State::HASH && !result.search) {
257 result.search = "";
258 }
259
260 // Set parser’s state to new state.
261 state = new_state;
262 // Increment parser’s token index by skip.
263 token_index += skip;
264 // Set parser’s component start to parser’s token index.
265 component_start = token_index;
266 // Set parser’s token increment to 0.
267 token_increment = 0;
268}
269
270template <url_pattern_regex::regex_concept regex_provider>
271std::string constructor_string_parser<regex_provider>::make_component_string() {
272 // Assert: parser’s token index is less than parser’s token list's size.
273 ADA_ASSERT_TRUE(token_index < token_list.size());
274
275 // Let token be parser’s token list[parser’s token index].
276 // Let end index be token’s index.
277 const auto end_index = token_list[token_index].index;
278 // Let component start token be the result of running get a safe token given
279 // parser and parser’s component start.
280 const auto component_start_token = get_safe_token(component_start);
281 ADA_ASSERT_TRUE(component_start_token);
282 // Let component start input index be component start token’s index.
283 const auto component_start_input_index = component_start_token->index;
284 // Return the code point substring from component start input index to end
285 // index within parser’s input.
286 return input.substr(component_start_input_index,
287 end_index - component_start_input_index);
288}
289
290template <url_pattern_regex::regex_concept regex_provider>
292 // Return the result of running is a non-special pattern char given parser,
293 // parser’s token index, and "@".
294 return is_non_special_pattern_char(token_index, "@");
295}
296
297template <url_pattern_regex::regex_concept regex_provider>
299 // Return the result of running is a non-special pattern char given parser,
300 // parser’s token index, and "/".
301 return is_non_special_pattern_char(token_index, "/");
302}
303
304template <url_pattern_regex::regex_concept regex_provider>
306 // Return the result of running is a non-special pattern char given parser,
307 // parser’s token index, and ":".
308 return is_non_special_pattern_char(token_index, ":");
309}
310
311template <url_pattern_regex::regex_concept regex_provider>
313 // Return the result of running is a non-special pattern char given parser,
314 // parser’s token index, and "[".
315 return is_non_special_pattern_char(token_index, "[");
316}
317
318template <url_pattern_regex::regex_concept regex_provider>
320 // Return the result of running is a non-special pattern char given parser,
321 // parser’s token index, and "]".
322 return is_non_special_pattern_char(token_index, "]");
323}
324
325template <url_pattern_regex::regex_concept regex_provider>
327 // Return the result of running is a non-special pattern char given parser,
328 // parser’s token index, and ":".
329 return is_non_special_pattern_char(token_index, ":");
330}
331
333 ada_log("Tokenizer::get_next_code_point called with index=", next_index);
334 ADA_ASSERT_TRUE(next_index < input.size());
335 // this assumes that we have a valid, non-truncated UTF-8 stream.
336 code_point = 0;
337 size_t number_bytes = 0;
338 unsigned char first_byte = input[next_index];
339
340 if ((first_byte & 0x80) == 0) {
341 // 1-byte character (ASCII)
342 next_index++;
343 code_point = first_byte;
344 ada_log("Tokenizer::get_next_code_point returning ASCII code point=",
345 uint32_t(code_point));
346 ada_log("Tokenizer::get_next_code_point next_index =", next_index,
347 " input.size()=", input.size());
348 return;
349 }
350 ada_log("Tokenizer::get_next_code_point read first byte=",
351 uint32_t(first_byte));
352 if ((first_byte & 0xE0) == 0xC0) {
353 code_point = first_byte & 0x1F;
354 number_bytes = 2;
355 ada_log("Tokenizer::get_next_code_point two bytes");
356 } else if ((first_byte & 0xF0) == 0xE0) {
357 code_point = first_byte & 0x0F;
358 number_bytes = 3;
359 ada_log("Tokenizer::get_next_code_point three bytes");
360 } else if ((first_byte & 0xF8) == 0xF0) {
361 code_point = first_byte & 0x07;
362 number_bytes = 4;
363 ada_log("Tokenizer::get_next_code_point four bytes");
364 }
365 ADA_ASSERT_TRUE(number_bytes + next_index <= input.size());
366
367 for (size_t i = 1 + next_index; i < number_bytes + next_index; ++i) {
368 unsigned char byte = input[i];
369 ada_log("Tokenizer::get_next_code_point read byte=", uint32_t(byte));
370 code_point = (code_point << 6) | (byte & 0x3F);
371 }
372 ada_log("Tokenizer::get_next_code_point returning non-ASCII code point=",
373 uint32_t(code_point));
374 ada_log("Tokenizer::get_next_code_point next_index =", next_index,
375 " input.size()=", input.size());
376 next_index += number_bytes;
377}
378
379inline void Tokenizer::seek_and_get_next_code_point(size_t new_index) {
380 ada_log("Tokenizer::seek_and_get_next_code_point called with new_index=",
381 new_index);
382 // Set tokenizer’s next index to index.
383 next_index = new_index;
384 // Run get the next code point given tokenizer.
386}
387
388inline void Tokenizer::add_token(token_type type, size_t next_position,
389 size_t value_position, size_t value_length) {
390 ada_log("Tokenizer::add_token called with type=", to_string(type),
391 " next_position=", next_position, " value_position=", value_position);
392 ADA_ASSERT_TRUE(next_position >= value_position);
393
394 // Let token be a new token.
395 // Set token’s type to type.
396 // Set token’s index to tokenizer’s index.
397 // Set token’s value to the code point substring from value position with
398 // length value length within tokenizer’s input.
399 // Append token to the back of tokenizer’s token list.
400 token_list.emplace_back(type, index,
401 input.substr(value_position, value_length));
402 // Set tokenizer’s index to next position.
403 index = next_position;
404}
405
407 size_t next_position,
408 size_t value_position) {
409 // Let computed length be next position − value position.
410 auto computed_length = next_position - value_position;
411 // Run add a token given tokenizer, type, next position, value position, and
412 // computed length.
413 add_token(type, next_position, value_position, computed_length);
414}
415
417 ada_log("Tokenizer::add_token_with_defaults called with type=",
418 to_string(type));
419 // Run add a token with default length given tokenizer, type, tokenizer’s next
420 // index, and tokenizer’s index.
421 add_token_with_default_length(type, next_index, index);
422}
423
424inline ada_warn_unused std::optional<errors>
426 size_t value_position) {
427 // If tokenizer’s policy is "strict", then throw a TypeError.
428 if (policy == token_policy::strict) {
429 ada_log("process_tokenizing_error failed with next_position=",
430 next_position, " value_position=", value_position);
431 return errors::type_error;
432 }
433 // Assert: tokenizer’s policy is "lenient".
435 // Run add a token with default length given tokenizer, "invalid-char", next
436 // position, and value position.
438 value_position);
439 return std::nullopt;
440}
441
442template <url_pattern_encoding_callback F>
444 // Let token be the result of running try to consume a token given parser and
445 // "other-modifier".
447 // If token is not null, then return token.
448 if (token) return token;
449 // Set token to the result of running try to consume a token given parser and
450 // "asterisk".
451 // Return token.
453}
454
455template <url_pattern_encoding_callback F>
457 const token* name_token) {
458 // Let token be the result of running try to consume a token given parser and
459 // "regexp".
461 // If name token is null and token is null, then set token to the result of
462 // running try to consume a token given parser and "asterisk".
463 if (!name_token && !token) {
465 }
466 // Return token.
467 return token;
468}
469
470template <url_pattern_encoding_callback F>
472 ada_log("url_pattern_parser::try_consume_token called with type=",
473 to_string(type));
474 // Assert: parser’s index is less than parser’s token list size.
475 ADA_ASSERT_TRUE(index < tokens.size());
476 // Let next token be parser’s token list[parser’s index].
477 auto& next_token = tokens[index];
478 // If next token’s type is not type return null.
479 if (next_token.type != type) return nullptr;
480 // Increase parser’s index by 1.
481 index++;
482 // Return next token.
483 return &next_token;
484}
485
486template <url_pattern_encoding_callback F>
488 // Let result be the empty string.
489 std::string result{};
490 // While true:
491 while (true) {
492 // Let token be the result of running try to consume a token given parser
493 // and "char".
495 // If token is null, then set token to the result of running try to consume
496 // a token given parser and "escaped-char".
498 // If token is null, then break.
499 if (!token) break;
500 // Append token’s value to the end of result.
501 result.append(token->value);
502 }
503 // Return result.
504 return result;
505}
506
507template <url_pattern_encoding_callback F>
509 ada_log("url_pattern_parser::consume_required_token called with type=",
510 to_string(type));
511 // Let result be the result of running try to consume a token given parser and
512 // type.
513 return try_consume_token(type) != nullptr;
514}
515
516template <url_pattern_encoding_callback F>
517std::optional<errors>
519 // If parser’s pending fixed value is the empty string, then return.
520 if (pending_fixed_value.empty()) {
521 ada_log("pending_fixed_value is empty");
522 return std::nullopt;
523 }
524 // Let encoded value be the result of running parser’s encoding callback given
525 // parser’s pending fixed value.
526 auto encoded_value = encoding_callback(pending_fixed_value);
527 if (!encoded_value) {
528 ada_log("failed to encode pending_fixed_value: ", pending_fixed_value);
529 return encoded_value.error();
530 }
531 // Set parser’s pending fixed value to the empty string.
532 pending_fixed_value.clear();
533 // Let part be a new part whose type is "fixed-text", value is encoded value,
534 // and modifier is "none".
535 // Append part to parser’s part list.
537 std::move(*encoded_value),
539 return std::nullopt;
540}
541
542template <url_pattern_encoding_callback F>
544 std::string_view prefix, token* name_token, token* regexp_or_wildcard_token,
545 std::string_view suffix, token* modifier_token) {
546 // Let modifier be "none".
547 auto modifier = url_pattern_part_modifier::none;
548 // If modifier token is not null:
549 if (modifier_token) {
550 // If modifier token’s value is "?" then set modifier to "optional".
551 if (modifier_token->value == "?") {
553 } else if (modifier_token->value == "*") {
554 // Otherwise if modifier token’s value is "*" then set modifier to
555 // "zero-or-more".
557 } else if (modifier_token->value == "+") {
558 // Otherwise if modifier token’s value is "+" then set modifier to
559 // "one-or-more".
561 }
562 }
563 // If name token is null and regexp or wildcard token is null and modifier
564 // is "none":
565 if (!name_token && !regexp_or_wildcard_token &&
567 // Append prefix to the end of parser’s pending fixed value.
568 pending_fixed_value.append(prefix);
569 return std::nullopt;
570 }
571 // Run maybe add a part from the pending fixed value given parser.
573 return *error;
574 }
575 // If name token is null and regexp or wildcard token is null:
576 if (!name_token && !regexp_or_wildcard_token) {
577 // Assert: suffix is the empty string.
578 ADA_ASSERT_TRUE(suffix.empty());
579 // If prefix is the empty string, then return.
580 if (prefix.empty()) return std::nullopt;
581 // Let encoded value be the result of running parser’s encoding callback
582 // given prefix.
583 auto encoded_value = encoding_callback(prefix);
584 if (!encoded_value) {
585 return encoded_value.error();
586 }
587 // Let part be a new part whose type is "fixed-text", value is encoded
588 // value, and modifier is modifier.
589 // Append part to parser’s part list.
591 std::move(*encoded_value), modifier);
592 return std::nullopt;
593 }
594 // Let regexp value be the empty string.
595 std::string regexp_value{};
596 // If regexp or wildcard token is null, then set regexp value to parser’s
597 // segment wildcard regexp.
598 if (!regexp_or_wildcard_token) {
599 regexp_value = segment_wildcard_regexp;
600 } else if (regexp_or_wildcard_token->type == token_type::ASTERISK) {
601 // Otherwise if regexp or wildcard token’s type is "asterisk", then set
602 // regexp value to the full wildcard regexp value.
603 regexp_value = ".*";
604 } else {
605 // Otherwise set regexp value to regexp or wildcard token’s value.
606 regexp_value = regexp_or_wildcard_token->value;
607 }
608 // Let type be "regexp".
610 // If regexp value is parser’s segment wildcard regexp:
611 if (regexp_value == segment_wildcard_regexp) {
612 // Set type to "segment-wildcard".
614 // Set regexp value to the empty string.
615 regexp_value.clear();
616 } else if (regexp_value == ".*") {
617 // Otherwise if regexp value is the full wildcard regexp value:
618 // Set type to "full-wildcard".
620 // Set regexp value to the empty string.
621 regexp_value.clear();
622 }
623 // Let name be the empty string.
624 std::string name{};
625 // If name token is not null, then set name to name token’s value.
626 if (name_token) {
627 name = name_token->value;
628 } else if (regexp_or_wildcard_token) {
629 // Otherwise if regexp or wildcard token is not null:
630 // Set name to parser’s next numeric name, serialized.
631 // TODO: Make sure this is correct.
632 name = std::to_string(next_numeric_name);
633 // Increment parser’s next numeric name by 1.
635 }
636 // If the result of running is a duplicate name given parser and name is
637 // true, then throw a TypeError.
638 if (std::ranges::any_of(
639 parts, [&name](const auto& part) { return part.name == name; })) {
640 return errors::type_error;
641 }
642 // Let encoded prefix be the result of running parser’s encoding callback
643 // given prefix.
644 auto encoded_prefix = encoding_callback(prefix);
645 if (!encoded_prefix) return encoded_prefix.error();
646 // Let encoded suffix be the result of running parser’s encoding callback
647 // given suffix.
648 auto encoded_suffix = encoding_callback(suffix);
649 if (!encoded_suffix) return encoded_suffix.error();
650 // Let part be a new part whose type is type, value is regexp value,
651 // modifier is modifier, name is name, prefix is encoded prefix, and suffix
652 // is encoded suffix.
653 // Append part to parser’s part list.
654 parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
655 std::move(*encoded_prefix), std::move(*encoded_suffix));
656 return std::nullopt;
657}
658
659template <url_pattern_encoding_callback F>
660tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
661 std::string_view input, url_pattern_compile_component_options& options,
662 F& encoding_callback) {
663 ada_log("parse_pattern_string input=", input);
664 // Let parser be a new pattern parser whose encoding callback is encoding
665 // callback and segment wildcard regexp is the result of running generate a
666 // segment wildcard regexp given options.
668 encoding_callback, generate_segment_wildcard_regexp(options));
669 // Set parser’s token list to the result of running tokenize given input and
670 // "strict".
671 auto tokenize_result = tokenize(input, token_policy::strict);
672 if (!tokenize_result) {
673 ada_log("parse_pattern_string tokenize failed");
674 return tl::unexpected(tokenize_result.error());
675 }
676 parser.tokens = std::move(*tokenize_result);
677
678 // While parser’s index is less than parser’s token list's size:
679 while (parser.can_continue()) {
680 // Let char token be the result of running try to consume a token given
681 // parser and "char".
682 auto char_token = parser.try_consume_token(token_type::CHAR);
683 // Let name token be the result of running try to consume a token given
684 // parser and "name".
685 auto name_token = parser.try_consume_token(token_type::NAME);
686 // Let regexp or wildcard token be the result of running try to consume a
687 // regexp or wildcard token given parser and name token.
688 auto regexp_or_wildcard_token =
689 parser.try_consume_regexp_or_wildcard_token(name_token);
690 // If name token is not null or regexp or wildcard token is not null:
691 if (name_token || regexp_or_wildcard_token) {
692 // Let prefix be the empty string.
693 std::string prefix{};
694 // If char token is not null then set prefix to char token’s value.
695 if (char_token) prefix = char_token->value;
696 // If prefix is not the empty string and not options’s prefix code point:
697 if (!prefix.empty() && prefix != options.get_prefix()) {
698 // Append prefix to the end of parser’s pending fixed value.
699 parser.pending_fixed_value.append(prefix);
700 // Set prefix to the empty string.
701 prefix.clear();
702 }
703 // Run maybe add a part from the pending fixed value given parser.
704 if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) {
705 ada_log("maybe_add_part_from_the_pending_fixed_value failed");
706 return tl::unexpected(*error);
707 }
708 // Let modifier token be the result of running try to consume a modifier
709 // token given parser.
710 auto modifier_token = parser.try_consume_modifier_token();
711 // Run add a part given parser, prefix, name token, regexp or wildcard
712 // token, the empty string, and modifier token.
713 if (auto error =
714 parser.add_part(prefix, name_token, regexp_or_wildcard_token, "",
715 modifier_token)) {
716 ada_log("parser.add_part failed");
717 return tl::unexpected(*error);
718 }
719 // Continue.
720 continue;
721 }
722
723 // Let fixed token be char token.
724 auto fixed_token = char_token;
725 // If fixed token is null, then set fixed token to the result of running try
726 // to consume a token given parser and "escaped-char".
727 if (!fixed_token)
728 fixed_token = parser.try_consume_token(token_type::ESCAPED_CHAR);
729 // If fixed token is not null:
730 if (fixed_token) {
731 // Append fixed token’s value to parser’s pending fixed value.
732 parser.pending_fixed_value.append(fixed_token->value);
733 // Continue.
734 continue;
735 }
736 // Let open token be the result of running try to consume a token given
737 // parser and "open".
738 auto open_token = parser.try_consume_token(token_type::OPEN);
739 // If open token is not null:
740 if (open_token) {
741 // Set prefix be the result of running consume text given parser.
742 auto prefix_ = parser.consume_text();
743 // Set name token to the result of running try to consume a token given
744 // parser and "name".
745 name_token = parser.try_consume_token(token_type::NAME);
746 // Set regexp or wildcard token to the result of running try to consume a
747 // regexp or wildcard token given parser and name token.
748 regexp_or_wildcard_token =
749 parser.try_consume_regexp_or_wildcard_token(name_token);
750 // Let suffix be the result of running consume text given parser.
751 auto suffix_ = parser.consume_text();
752 // Run consume a required token given parser and "close".
753 if (!parser.consume_required_token(token_type::CLOSE)) {
754 ada_log("parser.consume_required_token failed");
755 return tl::unexpected(errors::type_error);
756 }
757 // Set modifier token to the result of running try to consume a modifier
758 // token given parser.
759 auto modifier_token = parser.try_consume_modifier_token();
760 // Run add a part given parser, prefix, name token, regexp or wildcard
761 // token, suffix, and modifier token.
762 if (auto error =
763 parser.add_part(prefix_, name_token, regexp_or_wildcard_token,
764 suffix_, modifier_token)) {
765 return tl::unexpected(*error);
766 }
767 // Continue.
768 continue;
769 }
770 // Run maybe add a part from the pending fixed value given parser.
771 if (auto error = parser.maybe_add_part_from_the_pending_fixed_value()) {
772 ada_log("maybe_add_part_from_the_pending_fixed_value failed on line 992");
773 return tl::unexpected(*error);
774 }
775 // Run consume a required token given parser and "end".
776 if (!parser.consume_required_token(token_type::END)) {
777 return tl::unexpected(errors::type_error);
778 }
779 }
780 ada_log("parser.parts size is: ", parser.parts.size());
781 // Return parser’s part list.
782 return parser.parts;
783}
784
785template <url_pattern_regex::regex_concept regex_provider>
788 // let's avoid unnecessary copy here.
789 auto& regex = component.regexp;
790 return regex_provider::regex_match("http", regex) ||
791 regex_provider::regex_match("https", regex) ||
792 regex_provider::regex_match("ws", regex) ||
793 regex_provider::regex_match("wss", regex) ||
794 regex_provider::regex_match("ftp", regex);
795}
796
797template <url_pattern_regex::regex_concept regex_provider>
798inline std::optional<errors> constructor_string_parser<
800 ada_log(
801 "constructor_string_parser::compute_protocol_matches_special_scheme_"
802 "flag");
803 // Let protocol string be the result of running make a component string given
804 // parser.
805 auto protocol_string = make_component_string();
806 // Let protocol component be the result of compiling a component given
807 // protocol string, canonicalize a protocol, and default options.
808 auto protocol_component = url_pattern_component<regex_provider>::compile(
809 protocol_string, canonicalize_protocol,
811 if (!protocol_component) {
812 ada_log("url_pattern_component::compile failed for protocol_string ",
813 protocol_string);
814 return protocol_component.error();
815 }
816 // If the result of running protocol component matches a special scheme given
817 // protocol component is true, then set parser’s protocol matches a special
818 // scheme flag to true.
819 if (protocol_component_matches_special_scheme(*protocol_component)) {
820 protocol_matches_a_special_scheme_flag = true;
821 }
822 return std::nullopt;
823}
824
825template <url_pattern_regex::regex_concept regex_provider>
826tl::expected<url_pattern_init, errors>
828 ada_log("constructor_string_parser::parse input=", input);
829 // Let parser be a new constructor string parser whose input is input and
830 // token list is the result of running tokenize given input and "lenient".
831 auto token_list = tokenize(input, token_policy::lenient);
832 if (!token_list) {
833 return tl::unexpected(token_list.error());
834 }
835 auto parser = constructor_string_parser(input, std::move(*token_list));
836
837 // While parser’s token index is less than parser’s token list size:
838 while (parser.token_index < parser.token_list.size()) {
839 // Set parser’s token increment to 1.
840 parser.token_increment = 1;
841
842 // If parser’s token list[parser’s token index]'s type is "end" then:
843 if (parser.token_list[parser.token_index].type == token_type::END) {
844 // If parser’s state is "init":
845 if (parser.state == State::INIT) {
846 // Run rewind given parser.
847 parser.rewind();
848 // If the result of running is a hash prefix given parser is true, then
849 // run change state given parser, "hash" and 1.
850 if (parser.is_hash_prefix()) {
851 parser.change_state(State::HASH, 1);
852 } else if (parser.is_search_prefix()) {
853 // Otherwise if the result of running is a search prefix given parser
854 // is true: Run change state given parser, "search" and 1.
855 parser.change_state(State::SEARCH, 1);
856 } else {
857 // Run change state given parser, "pathname" and 0.
858 parser.change_state(State::PATHNAME, 0);
859 }
860 // Increment parser’s token index by parser’s token increment.
861 parser.token_index += parser.token_increment;
862 // Continue.
863 continue;
864 }
865
866 if (parser.state == State::AUTHORITY) {
867 // If parser’s state is "authority":
868 // Run rewind and set state given parser, and "hostname".
869 parser.rewind();
870 parser.change_state(State::HOSTNAME, 0);
871 // Increment parser’s token index by parser’s token increment.
872 parser.token_index += parser.token_increment;
873 // Continue.
874 continue;
875 }
876
877 // Run change state given parser, "done" and 0.
878 parser.change_state(State::DONE, 0);
879 // Break.
880 break;
881 }
882
883 // If the result of running is a group open given parser is true:
884 if (parser.is_group_open()) {
885 // Increment parser’s group depth by 1.
886 parser.group_depth += 1;
887 // Increment parser’s token index by parser’s token increment.
888 parser.token_index += parser.token_increment;
889 }
890
891 // If parser’s group depth is greater than 0:
892 if (parser.group_depth > 0) {
893 // If the result of running is a group close given parser is true, then
894 // decrement parser’s group depth by 1.
895 if (parser.is_group_close()) {
896 parser.group_depth -= 1;
897 } else {
898 // Increment parser’s token index by parser’s token increment.
899 parser.token_index += parser.token_increment;
900 continue;
901 }
902 }
903
904 // Switch on parser’s state and run the associated steps:
905 switch (parser.state) {
906 case State::INIT: {
907 // If the result of running is a protocol suffix given parser is true:
908 if (parser.is_protocol_suffix()) {
909 // Run rewind and set state given parser and "protocol".
910 parser.rewind();
911 parser.change_state(State::PROTOCOL, 0);
912 }
913 break;
914 }
915 case State::PROTOCOL: {
916 // If the result of running is a protocol suffix given parser is true:
917 if (parser.is_protocol_suffix()) {
918 // Run compute protocol matches a special scheme flag given parser.
919 if (const auto error =
920 parser.compute_protocol_matches_special_scheme_flag()) {
921 ada_log("compute_protocol_matches_special_scheme_flag failed");
922 return tl::unexpected(*error);
923 }
924 // Let next state be "pathname".
925 auto next_state = State::PATHNAME;
926 // Let skip be 1.
927 auto skip = 1;
928 // If the result of running next is authority slashes given parser is
929 // true:
930 if (parser.next_is_authority_slashes()) {
931 // Set next state to "authority".
932 next_state = State::AUTHORITY;
933 // Set skip to 3.
934 skip = 3;
935 } else if (parser.protocol_matches_a_special_scheme_flag) {
936 // Otherwise if parser’s protocol matches a special scheme flag is
937 // true, then set next state to "authority".
938 next_state = State::AUTHORITY;
939 }
940
941 // Run change state given parser, next state, and skip.
942 parser.change_state(next_state, skip);
943 }
944 break;
945 }
946 case State::AUTHORITY: {
947 // If the result of running is an identity terminator given parser is
948 // true, then run rewind and set state given parser and "username".
949 if (parser.is_an_identity_terminator()) {
950 parser.rewind();
951 parser.change_state(State::USERNAME, 0);
952 } else if (parser.is_pathname_start() || parser.is_search_prefix() ||
953 parser.is_hash_prefix()) {
954 // Otherwise if any of the following are true:
955 // - the result of running is a pathname start given parser;
956 // - the result of running is a search prefix given parser; or
957 // - the result of running is a hash prefix given parser,
958 // then run rewind and set state given parser and "hostname".
959 parser.rewind();
960 parser.change_state(State::HOSTNAME, 0);
961 }
962 break;
963 }
964 case State::USERNAME: {
965 // If the result of running is a password prefix given parser is true,
966 // then run change state given parser, "password", and 1.
967 if (parser.is_password_prefix()) {
968 parser.change_state(State::PASSWORD, 1);
969 } else if (parser.is_an_identity_terminator()) {
970 // Otherwise if the result of running is an identity terminator given
971 // parser is true, then run change state given parser, "hostname",
972 // and 1.
973 parser.change_state(State::HOSTNAME, 1);
974 }
975 break;
976 }
977 case State::PASSWORD: {
978 // If the result of running is an identity terminator given parser is
979 // true, then run change state given parser, "hostname", and 1.
980 if (parser.is_an_identity_terminator()) {
981 parser.change_state(State::HOSTNAME, 1);
982 }
983 break;
984 }
985 case State::HOSTNAME: {
986 // If the result of running is an IPv6 open given parser is true, then
987 // increment parser’s hostname IPv6 bracket depth by 1.
988 if (parser.is_an_ipv6_open()) {
989 parser.hostname_ipv6_bracket_depth += 1;
990 } else if (parser.is_an_ipv6_close()) {
991 // Otherwise if the result of running is an IPv6 close given parser is
992 // true, then decrement parser’s hostname IPv6 bracket depth by 1.
993 parser.hostname_ipv6_bracket_depth -= 1;
994 } else if (parser.is_port_prefix() &&
995 parser.hostname_ipv6_bracket_depth == 0) {
996 // Otherwise if the result of running is a port prefix given parser is
997 // true and parser’s hostname IPv6 bracket depth is zero, then run
998 // change state given parser, "port", and 1.
999 parser.change_state(State::PORT, 1);
1000 } else if (parser.is_pathname_start()) {
1001 // Otherwise if the result of running is a pathname start given parser
1002 // is true, then run change state given parser, "pathname", and 0.
1003 parser.change_state(State::PATHNAME, 0);
1004 } else if (parser.is_search_prefix()) {
1005 // Otherwise if the result of running is a search prefix given parser
1006 // is true, then run change state given parser, "search", and 1.
1007 parser.change_state(State::SEARCH, 1);
1008 } else if (parser.is_hash_prefix()) {
1009 // Otherwise if the result of running is a hash prefix given parser is
1010 // true, then run change state given parser, "hash", and 1.
1011 parser.change_state(State::HASH, 1);
1012 }
1013
1014 break;
1015 }
1016 case State::PORT: {
1017 // If the result of running is a pathname start given parser is true,
1018 // then run change state given parser, "pathname", and 0.
1019 if (parser.is_pathname_start()) {
1020 parser.change_state(State::PATHNAME, 0);
1021 } else if (parser.is_search_prefix()) {
1022 // Otherwise if the result of running is a search prefix given parser
1023 // is true, then run change state given parser, "search", and 1.
1024 parser.change_state(State::SEARCH, 1);
1025 } else if (parser.is_hash_prefix()) {
1026 // Otherwise if the result of running is a hash prefix given parser is
1027 // true, then run change state given parser, "hash", and 1.
1028 parser.change_state(State::HASH, 1);
1029 }
1030 break;
1031 }
1032 case State::PATHNAME: {
1033 // If the result of running is a search prefix given parser is true,
1034 // then run change state given parser, "search", and 1.
1035 if (parser.is_search_prefix()) {
1036 parser.change_state(State::SEARCH, 1);
1037 } else if (parser.is_hash_prefix()) {
1038 // Otherwise if the result of running is a hash prefix given parser is
1039 // true, then run change state given parser, "hash", and 1.
1040 parser.change_state(State::HASH, 1);
1041 }
1042 break;
1043 }
1044 case State::SEARCH: {
1045 // If the result of running is a hash prefix given parser is true, then
1046 // run change state given parser, "hash", and 1.
1047 if (parser.is_hash_prefix()) {
1048 parser.change_state(State::HASH, 1);
1049 }
1050 }
1051 case State::HASH: {
1052 // Do nothing
1053 break;
1054 }
1055 default: {
1056 // Assert: This step is never reached.
1057 unreachable();
1058 }
1059 }
1060
1061 // Increment parser’s token index by parser’s token increment.
1062 parser.token_index += parser.token_increment;
1063 }
1064
1065 // If parser’s result contains "hostname" and not "port", then set parser’s
1066 // result["port"] to the empty string.
1067 if (parser.result.hostname && !parser.result.port) {
1068 parser.result.port = "";
1069 }
1070
1071 // Return parser’s result.
1072 return parser.result;
1073}
1074
1075} // namespace ada::url_pattern_helpers
1076
1077#endif
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
regex_provider::regex_type regexp
void add_token_with_default_length(token_type type, size_t next_position, size_t value_position)
void add_token(token_type type, size_t next_position, size_t value_position, size_t value_length)
std::optional< errors > process_tokenizing_error(size_t next_position, size_t value_position) ada_warn_unused
std::optional< errors > add_part(std::string_view prefix, token *name_token, token *regexp_or_wildcard_token, std::string_view suyffix, token *modifier_token) ada_warn_unused
token * try_consume_regexp_or_wildcard_token(const token *name_token)
std::optional< errors > maybe_add_part_from_the_pending_fixed_value() ada_warn_unused
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_warn_unused
Definition common_defs.h:85
Definitions for user facing functions for parsing URL and it's components.
Includes the definitions for supported parsers.
Definition parser-inl.h:15
std::string to_string(token_type type)
bool protocol_component_matches_special_scheme(url_pattern_component< regex_provider > &component)
tl::expected< std::vector< token >, errors > tokenize(std::string_view input, token_policy policy)
std::string generate_segment_wildcard_regexp(url_pattern_compile_component_options options)
tl::expected< std::string, errors > canonicalize_protocol(std::string_view input)
tl::expected< std::vector< url_pattern_part >, errors > parse_pattern_string(std::string_view input, url_pattern_compile_component_options &options, F &encoding_callback)
errors
Definition errors.h:10
@ type_error
Definition errors.h:10
void unreachable()
tl::expected< result_type, ada::errors > result
std::string_view get_prefix() const ada_warn_unused
static url_pattern_compile_component_options DEFAULT
Definition url_pattern.h:98
constructor_string_parser(std::string_view new_input, std::vector< token > &&new_token_list)
static tl::expected< url_pattern_init, errors > parse(std::string_view input)
ada::url_pattern_regex::std_regex_provider regex_provider
Definition url_pattern.cc:9
Declaration for the URLPattern helpers.