Ada 2.9.2
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url.cpp
Go to the documentation of this file.
1#include "ada.h"
2#include "ada/scheme.h"
3#include "ada/log.h"
4
5#include <numeric>
6#include <algorithm>
7#include <string>
8#include <string_view>
9
10namespace ada {
11
12bool url::parse_opaque_host(std::string_view input) {
13 ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]");
14 if (std::ranges::any_of(input.begin(), input.end(),
15 ada::unicode::is_forbidden_host_code_point)) {
16 return is_valid = false;
17 }
18
19 // Return the result of running UTF-8 percent-encode on input using the C0
20 // control percent-encode set.
21 host = ada::unicode::percent_encode(
23 return true;
24}
25
26bool url::parse_ipv4(std::string_view input) {
27 ada_log("parse_ipv4 ", input, " [", input.size(), " bytes]");
28 if (input.back() == '.') {
29 input.remove_suffix(1);
30 }
31 size_t digit_count{0};
32 int pure_decimal_count = 0; // entries that are decimal
33 std::string_view original_input =
34 input; // we might use this if pure_decimal_count == 4.
35 uint64_t ipv4{0};
36 // we could unroll for better performance?
37 for (; (digit_count < 4) && !(input.empty()); digit_count++) {
38 uint32_t
39 segment_result{}; // If any number exceeds 32 bits, we have an error.
40 bool is_hex = checkers::has_hex_prefix(input);
41 if (is_hex && ((input.length() == 2) ||
42 ((input.length() > 2) && (input[2] == '.')))) {
43 // special case
44 segment_result = 0;
45 input.remove_prefix(2);
46 } else {
47 std::from_chars_result r{};
48 if (is_hex) {
49 r = std::from_chars(input.data() + 2, input.data() + input.size(),
50 segment_result, 16);
51 } else if ((input.length() >= 2) && input[0] == '0' &&
52 checkers::is_digit(input[1])) {
53 r = std::from_chars(input.data() + 1, input.data() + input.size(),
54 segment_result, 8);
55 } else {
56 pure_decimal_count++;
57 r = std::from_chars(input.data(), input.data() + input.size(),
58 segment_result, 10);
59 }
60 if (r.ec != std::errc()) {
61 return is_valid = false;
62 }
63 input.remove_prefix(r.ptr - input.data());
64 }
65 if (input.empty()) {
66 // We have the last value.
67 // At this stage, ipv4 contains digit_count*8 bits.
68 // So we have 32-digit_count*8 bits left.
69 if (segment_result >= (uint64_t(1) << (32 - digit_count * 8))) {
70 return is_valid = false;
71 }
72 ipv4 <<= (32 - digit_count * 8);
73 ipv4 |= segment_result;
74 goto final;
75 } else {
76 // There is more, so that the value must no be larger than 255
77 // and we must have a '.'.
78 if ((segment_result > 255) || (input[0] != '.')) {
79 return is_valid = false;
80 }
81 ipv4 <<= 8;
82 ipv4 |= segment_result;
83 input.remove_prefix(1); // remove '.'
84 }
85 }
86 if ((digit_count != 4) || (!input.empty())) {
87 return is_valid = false;
88 }
89final:
90 // We could also check r.ptr to see where the parsing ended.
91 if (pure_decimal_count == 4) {
92 host = original_input; // The original input was already all decimal and we
93 // validated it.
94 } else {
95 host = ada::serializers::ipv4(ipv4); // We have to reserialize the address.
96 }
98 return true;
99}
100
101bool url::parse_ipv6(std::string_view input) {
102 ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]");
103
104 if (input.empty()) {
105 return is_valid = false;
106 }
107 // Let address be a new IPv6 address whose IPv6 pieces are all 0.
108 std::array<uint16_t, 8> address{};
109
110 // Let pieceIndex be 0.
111 int piece_index = 0;
112
113 // Let compress be null.
114 std::optional<int> compress{};
115
116 // Let pointer be a pointer for input.
117 std::string_view::iterator pointer = input.begin();
118
119 // If c is U+003A (:), then:
120 if (input[0] == ':') {
121 // If remaining does not start with U+003A (:), validation error, return
122 // failure.
123 if (input.size() == 1 || input[1] != ':') {
124 ada_log("parse_ipv6 starts with : but the rest does not start with :");
125 return is_valid = false;
126 }
127
128 // Increase pointer by 2.
129 pointer += 2;
130
131 // Increase pieceIndex by 1 and then set compress to pieceIndex.
132 compress = ++piece_index;
133 }
134
135 // While c is not the EOF code point:
136 while (pointer != input.end()) {
137 // If pieceIndex is 8, validation error, return failure.
138 if (piece_index == 8) {
139 ada_log("parse_ipv6 piece_index == 8");
140 return is_valid = false;
141 }
142
143 // If c is U+003A (:), then:
144 if (*pointer == ':') {
145 // If compress is non-null, validation error, return failure.
146 if (compress.has_value()) {
147 ada_log("parse_ipv6 compress is non-null");
148 return is_valid = false;
149 }
150
151 // Increase pointer and pieceIndex by 1, set compress to pieceIndex, and
152 // then continue.
153 pointer++;
154 compress = ++piece_index;
155 continue;
156 }
157
158 // Let value and length be 0.
159 uint16_t value = 0, length = 0;
160
161 // While length is less than 4 and c is an ASCII hex digit,
162 // set value to value times 0x10 + c interpreted as hexadecimal number, and
163 // increase pointer and length by 1.
164 while (length < 4 && pointer != input.end() &&
165 unicode::is_ascii_hex_digit(*pointer)) {
166 // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int
167 value = uint16_t(value * 0x10 + unicode::convert_hex_to_binary(*pointer));
168 pointer++;
169 length++;
170 }
171
172 // If c is U+002E (.), then:
173 if (pointer != input.end() && *pointer == '.') {
174 // If length is 0, validation error, return failure.
175 if (length == 0) {
176 ada_log("parse_ipv6 length is 0");
177 return is_valid = false;
178 }
179
180 // Decrease pointer by length.
181 pointer -= length;
182
183 // If pieceIndex is greater than 6, validation error, return failure.
184 if (piece_index > 6) {
185 ada_log("parse_ipv6 piece_index > 6");
186 return is_valid = false;
187 }
188
189 // Let numbersSeen be 0.
190 int numbers_seen = 0;
191
192 // While c is not the EOF code point:
193 while (pointer != input.end()) {
194 // Let ipv4Piece be null.
195 std::optional<uint16_t> ipv4_piece{};
196
197 // If numbersSeen is greater than 0, then:
198 if (numbers_seen > 0) {
199 // If c is a U+002E (.) and numbersSeen is less than 4, then increase
200 // pointer by 1.
201 if (*pointer == '.' && numbers_seen < 4) {
202 pointer++;
203 }
204 // Otherwise, validation error, return failure.
205 else {
206 ada_log("parse_ipv6 Otherwise, validation error, return failure");
207 return is_valid = false;
208 }
209 }
210
211 // If c is not an ASCII digit, validation error, return failure.
212 if (pointer == input.end() || !checkers::is_digit(*pointer)) {
213 ada_log(
214 "parse_ipv6 If c is not an ASCII digit, validation error, return "
215 "failure");
216 return is_valid = false;
217 }
218
219 // While c is an ASCII digit:
220 while (pointer != input.end() && checkers::is_digit(*pointer)) {
221 // Let number be c interpreted as decimal number.
222 int number = *pointer - '0';
223
224 // If ipv4Piece is null, then set ipv4Piece to number.
225 if (!ipv4_piece.has_value()) {
226 ipv4_piece = number;
227 }
228 // Otherwise, if ipv4Piece is 0, validation error, return failure.
229 else if (ipv4_piece == 0) {
230 ada_log("parse_ipv6 if ipv4Piece is 0, validation error");
231 return is_valid = false;
232 }
233 // Otherwise, set ipv4Piece to ipv4Piece times 10 + number.
234 else {
235 ipv4_piece = *ipv4_piece * 10 + number;
236 }
237
238 // If ipv4Piece is greater than 255, validation error, return failure.
239 if (ipv4_piece > 255) {
240 ada_log("parse_ipv6 ipv4_piece > 255");
241 return is_valid = false;
242 }
243
244 // Increase pointer by 1.
245 pointer++;
246 }
247
248 // Set address[pieceIndex] to address[pieceIndex] times 0x100 +
249 // ipv4Piece.
250 // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int
251 address[piece_index] =
252 uint16_t(address[piece_index] * 0x100 + *ipv4_piece);
253
254 // Increase numbersSeen by 1.
255 numbers_seen++;
256
257 // If numbersSeen is 2 or 4, then increase pieceIndex by 1.
258 if (numbers_seen == 2 || numbers_seen == 4) {
259 piece_index++;
260 }
261 }
262
263 // If numbersSeen is not 4, validation error, return failure.
264 if (numbers_seen != 4) {
265 return is_valid = false;
266 }
267
268 // Break.
269 break;
270 }
271 // Otherwise, if c is U+003A (:):
272 else if ((pointer != input.end()) && (*pointer == ':')) {
273 // Increase pointer by 1.
274 pointer++;
275
276 // If c is the EOF code point, validation error, return failure.
277 if (pointer == input.end()) {
278 ada_log(
279 "parse_ipv6 If c is the EOF code point, validation error, return "
280 "failure");
281 return is_valid = false;
282 }
283 }
284 // Otherwise, if c is not the EOF code point, validation error, return
285 // failure.
286 else if (pointer != input.end()) {
287 ada_log(
288 "parse_ipv6 Otherwise, if c is not the EOF code point, validation "
289 "error, return failure");
290 return is_valid = false;
291 }
292
293 // Set address[pieceIndex] to value.
294 address[piece_index] = value;
295
296 // Increase pieceIndex by 1.
297 piece_index++;
298 }
299
300 // If compress is non-null, then:
301 if (compress.has_value()) {
302 // Let swaps be pieceIndex - compress.
303 int swaps = piece_index - *compress;
304
305 // Set pieceIndex to 7.
306 piece_index = 7;
307
308 // While pieceIndex is not 0 and swaps is greater than 0,
309 // swap address[pieceIndex] with address[compress + swaps - 1], and then
310 // decrease both pieceIndex and swaps by 1.
311 while (piece_index != 0 && swaps > 0) {
312 std::swap(address[piece_index], address[*compress + swaps - 1]);
313 piece_index--;
314 swaps--;
315 }
316 }
317 // Otherwise, if compress is null and pieceIndex is not 8, validation error,
318 // return failure.
319 else if (piece_index != 8) {
320 ada_log(
321 "parse_ipv6 if compress is null and pieceIndex is not 8, validation "
322 "error, return failure");
323 return is_valid = false;
324 }
325 host = ada::serializers::ipv6(address);
326 ada_log("parse_ipv6 ", *host);
327 host_type = IPV6;
328 return true;
329}
330
331template <bool has_state_override>
332ada_really_inline bool url::parse_scheme(const std::string_view input) {
333 auto parsed_type = ada::scheme::get_scheme_type(input);
334 bool is_input_special = (parsed_type != ada::scheme::NOT_SPECIAL);
339 if (is_input_special) { // fast path!!!
340 if constexpr (has_state_override) {
341 // If url's scheme is not a special scheme and buffer is a special scheme,
342 // then return.
343 if (is_special() != is_input_special) {
344 return false;
345 }
346
347 // If url includes credentials or has a non-null port, and buffer is
348 // "file", then return.
349 if ((has_credentials() || port.has_value()) &&
350 parsed_type == ada::scheme::type::FILE) {
351 return false;
352 }
353
354 // If url's scheme is "file" and its host is an empty host, then return.
355 // An empty host is the empty string.
356 if (type == ada::scheme::type::FILE && host.has_value() &&
357 host.value().empty()) {
358 return false;
359 }
360 }
361
362 type = parsed_type;
363
364 if constexpr (has_state_override) {
365 // This is uncommon.
366 uint16_t urls_scheme_port = get_special_port();
367
368 if (urls_scheme_port) {
369 // If url's port is url's scheme's default port, then set url's port to
370 // null.
371 if (port.has_value() && *port == urls_scheme_port) {
372 port = std::nullopt;
373 }
374 }
375 }
376 } else { // slow path
377 std::string _buffer(input);
378 // Next function is only valid if the input is ASCII and returns false
379 // otherwise, but it seems that we always have ascii content so we do not
380 // need to check the return value.
381 // bool is_ascii =
382 unicode::to_lower_ascii(_buffer.data(), _buffer.size());
383
384 if constexpr (has_state_override) {
385 // If url's scheme is a special scheme and buffer is not a special scheme,
386 // then return. If url's scheme is not a special scheme and buffer is a
387 // special scheme, then return.
388 if (is_special() != ada::scheme::is_special(_buffer)) {
389 return true;
390 }
391
392 // If url includes credentials or has a non-null port, and buffer is
393 // "file", then return.
394 if ((has_credentials() || port.has_value()) && _buffer == "file") {
395 return true;
396 }
397
398 // If url's scheme is "file" and its host is an empty host, then return.
399 // An empty host is the empty string.
400 if (type == ada::scheme::type::FILE && host.has_value() &&
401 host.value().empty()) {
402 return true;
403 }
404 }
405
406 set_scheme(std::move(_buffer));
407
408 if constexpr (has_state_override) {
409 // This is uncommon.
410 uint16_t urls_scheme_port = get_special_port();
411
412 if (urls_scheme_port) {
413 // If url's port is url's scheme's default port, then set url's port to
414 // null.
415 if (port.has_value() && *port == urls_scheme_port) {
416 port = std::nullopt;
417 }
418 }
419 }
420 }
421
422 return true;
423}
424
425ada_really_inline bool url::parse_host(std::string_view input) {
426 ada_log("parse_host ", input, " [", input.size(), " bytes]");
427 if (input.empty()) {
428 return is_valid = false;
429 } // technically unnecessary.
430 // If input starts with U+005B ([), then:
431 if (input[0] == '[') {
432 // If input does not end with U+005D (]), validation error, return failure.
433 if (input.back() != ']') {
434 return is_valid = false;
435 }
436 ada_log("parse_host ipv6");
437
438 // Return the result of IPv6 parsing input with its leading U+005B ([) and
439 // trailing U+005D (]) removed.
440 input.remove_prefix(1);
441 input.remove_suffix(1);
442 return parse_ipv6(input);
443 }
444
445 // If isNotSpecial is true, then return the result of opaque-host parsing
446 // input.
447 if (!is_special()) {
448 return parse_opaque_host(input);
449 }
450 // Let domain be the result of running UTF-8 decode without BOM on the
451 // percent-decoding of input. Let asciiDomain be the result of running domain
452 // to ASCII with domain and false. The most common case is an ASCII input, in
453 // which case we do not need to call the expensive 'to_ascii' if a few
454 // conditions are met: no '%' and no 'xn-' subsequence.
455 std::string buffer = std::string(input);
456 // This next function checks that the result is ascii, but we are going to
457 // to check anyhow with is_forbidden.
458 // bool is_ascii =
459 unicode::to_lower_ascii(buffer.data(), buffer.size());
460 bool is_forbidden = unicode::contains_forbidden_domain_code_point(
461 buffer.data(), buffer.size());
462 if (is_forbidden == 0 && buffer.find("xn-") == std::string_view::npos) {
463 // fast path
464 host = std::move(buffer);
465 if (checkers::is_ipv4(host.value())) {
466 ada_log("parse_host fast path ipv4");
467 return parse_ipv4(host.value());
468 }
469 ada_log("parse_host fast path ", *host);
470 return true;
471 }
472 ada_log("parse_host calling to_ascii");
473 is_valid = ada::unicode::to_ascii(host, input, input.find('%'));
474 if (!is_valid) {
475 ada_log("parse_host to_ascii returns false");
476 return is_valid = false;
477 }
478 ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(),
479 " bytes]");
480
481 if (std::any_of(host.value().begin(), host.value().end(),
482 ada::unicode::is_forbidden_domain_code_point)) {
483 host = std::nullopt;
484 return is_valid = false;
485 }
486
487 // If asciiDomain ends in a number, then return the result of IPv4 parsing
488 // asciiDomain.
489 if (checkers::is_ipv4(host.value())) {
490 ada_log("parse_host got ipv4 ", *host);
491 return parse_ipv4(host.value());
492 }
493
494 return true;
495}
496
497ada_really_inline void url::parse_path(std::string_view input) {
498 ada_log("parse_path ", input);
499 std::string tmp_buffer;
500 std::string_view internal_input;
501 if (unicode::has_tabs_or_newline(input)) {
502 tmp_buffer = input;
503 // Optimization opportunity: Instead of copying and then pruning, we could
504 // just directly build the string from user_input.
505 helpers::remove_ascii_tab_or_newline(tmp_buffer);
506 internal_input = tmp_buffer;
507 } else {
508 internal_input = input;
509 }
510
511 // If url is special, then:
512 if (is_special()) {
513 if (internal_input.empty()) {
514 path = "/";
515 } else if ((internal_input[0] == '/') || (internal_input[0] == '\\')) {
516 helpers::parse_prepared_path(internal_input.substr(1), type, path);
517 } else {
518 helpers::parse_prepared_path(internal_input, type, path);
519 }
520 } else if (!internal_input.empty()) {
521 if (internal_input[0] == '/') {
522 helpers::parse_prepared_path(internal_input.substr(1), type, path);
523 } else {
524 helpers::parse_prepared_path(internal_input, type, path);
525 }
526 } else {
527 if (!host.has_value()) {
528 path = "/";
529 }
530 }
531}
532
533[[nodiscard]] std::string url::to_string() const {
534 if (!is_valid) {
535 return "null";
536 }
537 std::string answer;
538 auto back = std::back_insert_iterator(answer);
539 answer.append("{\n");
540 answer.append("\t\"protocol\":\"");
541 helpers::encode_json(get_protocol(), back);
542 answer.append("\",\n");
543 if (has_credentials()) {
544 answer.append("\t\"username\":\"");
545 helpers::encode_json(username, back);
546 answer.append("\",\n");
547 answer.append("\t\"password\":\"");
548 helpers::encode_json(password, back);
549 answer.append("\",\n");
550 }
551 if (host.has_value()) {
552 answer.append("\t\"host\":\"");
553 helpers::encode_json(host.value(), back);
554 answer.append("\",\n");
555 }
556 if (port.has_value()) {
557 answer.append("\t\"port\":\"");
558 answer.append(std::to_string(port.value()));
559 answer.append("\",\n");
560 }
561 answer.append("\t\"path\":\"");
562 helpers::encode_json(path, back);
563 answer.append("\",\n");
564 answer.append("\t\"opaque path\":");
565 answer.append((has_opaque_path ? "true" : "false"));
566 if (has_search()) {
567 answer.append(",\n");
568 answer.append("\t\"query\":\"");
569 helpers::encode_json(query.value(), back);
570 answer.append("\"");
571 }
572 if (hash.has_value()) {
573 answer.append(",\n");
574 answer.append("\t\"hash\":\"");
575 helpers::encode_json(hash.value(), back);
576 answer.append("\"");
577 }
578 answer.append("\n}");
579 return answer;
580}
581
582[[nodiscard]] bool url::has_valid_domain() const noexcept {
583 if (!host.has_value()) {
584 return false;
585 }
586 return checkers::verify_dns_length(host.value());
587}
588
589[[nodiscard]] std::string url::get_origin() const noexcept {
590 if (is_special()) {
591 // Return a new opaque origin.
592 if (type == scheme::FILE) {
593 return "null";
594 }
595 return ada::helpers::concat(get_protocol(), "//", get_host());
596 }
597
598 if (non_special_scheme == "blob") {
599 if (!path.empty()) {
600 auto result = ada::parse<ada::url>(path);
601 if (result &&
602 (result->type == scheme::HTTP || result->type == scheme::HTTPS)) {
603 // If pathURL's scheme is not "http" and not "https", then return a
604 // new opaque origin.
605 return ada::helpers::concat(result->get_protocol(), "//",
606 result->get_host());
607 }
608 }
609 }
610
611 // Return a new opaque origin.
612 return "null";
613}
614
615[[nodiscard]] std::string url::get_protocol() const noexcept {
616 if (is_special()) {
617 return helpers::concat(ada::scheme::details::is_special_list[type], ":");
618 }
619 // We only move the 'scheme' if it is non-special.
620 return helpers::concat(non_special_scheme, ":");
621}
622
623[[nodiscard]] std::string url::get_host() const noexcept {
624 // If url's host is null, then return the empty string.
625 // If url's port is null, return url's host, serialized.
626 // Return url's host, serialized, followed by U+003A (:) and url's port,
627 // serialized.
628 if (!host.has_value()) {
629 return "";
630 }
631 if (port.has_value()) {
632 return host.value() + ":" + get_port();
633 }
634 return host.value();
635}
636
637[[nodiscard]] std::string url::get_hostname() const noexcept {
638 return host.value_or("");
639}
640
641[[nodiscard]] std::string url::get_search() const noexcept {
642 // If this's URL's query is either null or the empty string, then return the
643 // empty string. Return U+003F (?), followed by this's URL's query.
644 return (!query.has_value() || (query.value().empty())) ? ""
645 : "?" + query.value();
646}
647
648[[nodiscard]] const std::string& url::get_username() const noexcept {
649 return username;
650}
651
652[[nodiscard]] const std::string& url::get_password() const noexcept {
653 return password;
654}
655
656[[nodiscard]] std::string url::get_port() const noexcept {
657 return port.has_value() ? std::to_string(port.value()) : "";
658}
659
660[[nodiscard]] std::string url::get_hash() const noexcept {
661 // If this's URL's fragment is either null or the empty string, then return
662 // the empty string. Return U+0023 (#), followed by this's URL's fragment.
663 return (!hash.has_value() || (hash.value().empty())) ? ""
664 : "#" + hash.value();
665}
666
667template <bool override_hostname>
668bool url::set_host_or_hostname(const std::string_view input) {
669 if (has_opaque_path) {
670 return false;
671 }
672
673 std::optional<std::string> previous_host = host;
674 std::optional<uint16_t> previous_port = port;
675
676 size_t host_end_pos = input.find('#');
677 std::string _host(input.data(), host_end_pos != std::string_view::npos
678 ? host_end_pos
679 : input.size());
680 helpers::remove_ascii_tab_or_newline(_host);
681 std::string_view new_host(_host);
682
683 // If url's scheme is "file", then set state to file host state, instead of
684 // host state.
685 if (type != ada::scheme::type::FILE) {
686 std::string_view host_view(_host.data(), _host.length());
687 auto [location, found_colon] =
688 helpers::get_host_delimiter_location(is_special(), host_view);
689
690 // Otherwise, if c is U+003A (:) and insideBrackets is false, then:
691 // Note: the 'found_colon' value is true if and only if a colon was
692 // encountered while not inside brackets.
693 if (found_colon) {
694 if constexpr (override_hostname) {
695 return false;
696 }
697 std::string_view buffer = new_host.substr(location + 1);
698 if (!buffer.empty()) {
699 set_port(buffer);
700 }
701 }
702 // If url is special and host_view is the empty string, validation error,
703 // return failure. Otherwise, if state override is given, host_view is the
704 // empty string, and either url includes credentials or url's port is
705 // non-null, return.
706 else if (host_view.empty() &&
707 (is_special() || has_credentials() || port.has_value())) {
708 return false;
709 }
710
711 // Let host be the result of host parsing host_view with url is not special.
712 if (host_view.empty() && !is_special()) {
713 host = "";
714 return true;
715 }
716
717 bool succeeded = parse_host(host_view);
718 if (!succeeded) {
719 host = previous_host;
720 update_base_port(previous_port);
721 }
722 return succeeded;
723 }
724
725 size_t location = new_host.find_first_of("/\\?");
726 if (location != std::string_view::npos) {
727 new_host.remove_suffix(new_host.length() - location);
728 }
729
730 if (new_host.empty()) {
731 // Set url's host to the empty string.
732 host = "";
733 } else {
734 // Let host be the result of host parsing buffer with url is not special.
735 if (!parse_host(new_host)) {
736 host = previous_host;
737 update_base_port(previous_port);
738 return false;
739 }
740
741 // If host is "localhost", then set host to the empty string.
742 if (host.has_value() && host.value() == "localhost") {
743 host = "";
744 }
745 }
746 return true;
747}
748
749bool url::set_host(const std::string_view input) {
750 return set_host_or_hostname<false>(input);
751}
752
753bool url::set_hostname(const std::string_view input) {
754 return set_host_or_hostname<true>(input);
755}
756
757bool url::set_username(const std::string_view input) {
758 if (cannot_have_credentials_or_port()) {
759 return false;
760 }
761 username = ada::unicode::percent_encode(
763 return true;
764}
765
766bool url::set_password(const std::string_view input) {
767 if (cannot_have_credentials_or_port()) {
768 return false;
769 }
770 password = ada::unicode::percent_encode(
772 return true;
773}
774
775bool url::set_port(const std::string_view input) {
776 if (cannot_have_credentials_or_port()) {
777 return false;
778 }
779 std::string trimmed(input);
780 helpers::remove_ascii_tab_or_newline(trimmed);
781 if (trimmed.empty()) {
782 port = std::nullopt;
783 return true;
784 }
785 // Input should not start with control characters.
786 if (ada::unicode::is_c0_control_or_space(trimmed.front())) {
787 return false;
788 }
789 // Input should contain at least one ascii digit.
790 if (input.find_first_of("0123456789") == std::string_view::npos) {
791 return false;
792 }
793
794 // Revert changes if parse_port fails.
795 std::optional<uint16_t> previous_port = port;
796 parse_port(trimmed);
797 if (is_valid) {
798 return true;
799 }
800 port = previous_port;
801 is_valid = true;
802 return false;
803}
804
805void url::set_hash(const std::string_view input) {
806 if (input.empty()) {
807 hash = std::nullopt;
808 helpers::strip_trailing_spaces_from_opaque_path(*this);
809 return;
810 }
811
812 std::string new_value;
813 new_value = input[0] == '#' ? input.substr(1) : input;
814 helpers::remove_ascii_tab_or_newline(new_value);
815 hash = unicode::percent_encode(new_value,
817}
818
819void url::set_search(const std::string_view input) {
820 if (input.empty()) {
821 query = std::nullopt;
822 helpers::strip_trailing_spaces_from_opaque_path(*this);
823 return;
824 }
825
826 std::string new_value;
827 new_value = input[0] == '?' ? input.substr(1) : input;
828 helpers::remove_ascii_tab_or_newline(new_value);
829
830 auto query_percent_encode_set =
833
834 query = ada::unicode::percent_encode(new_value, query_percent_encode_set);
835}
836
837bool url::set_pathname(const std::string_view input) {
838 if (has_opaque_path) {
839 return false;
840 }
841 path = "";
842 parse_path(input);
843 return true;
844}
845
846bool url::set_protocol(const std::string_view input) {
847 std::string view(input);
848 helpers::remove_ascii_tab_or_newline(view);
849 if (view.empty()) {
850 return true;
851 }
852
853 // Schemes should start with alpha values.
854 if (!checkers::is_alpha(view[0])) {
855 return false;
856 }
857
858 view.append(":");
859
860 std::string::iterator pointer =
861 std::ranges::find_if_not(view, unicode::is_alnum_plus);
862
863 if (pointer != view.end() && *pointer == ':') {
864 return parse_scheme<true>(
865 std::string_view(view.data(), pointer - view.begin()));
866 }
867 return false;
868}
869
870bool url::set_href(const std::string_view input) {
872
873 if (out) {
874 *this = *out;
875 }
876
877 return out.has_value();
878}
879
880} // namespace ada
Includes all definitions for Ada.
#define ada_really_inline
Definition common_defs.h:77
constexpr uint8_t QUERY_PERCENT_ENCODE[32]
constexpr uint8_t SPECIAL_QUERY_PERCENT_ENCODE[32]
constexpr uint8_t C0_CONTROL_PERCENT_ENCODE[32]
constexpr uint8_t USERINFO_PERCENT_ENCODE[32]
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
constexpr bool has_hex_prefix(std::string_view input)
constexpr bool is_alpha(char x) noexcept
constexpr bool is_digit(char x) noexcept
constexpr std::string_view is_special_list[]
Definition scheme-inl.h:19
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept
Definition scheme-inl.h:72
@ NOT_SPECIAL
Definition scheme.h:31
constexpr uint16_t get_special_port(std::string_view scheme) noexcept
Definition scheme-inl.h:57
std::string ipv6(const std::array< uint16_t, 8 > &address) noexcept
std::string ipv4(uint64_t address) noexcept
Definition ada_idna.h:13
@ IPV6
Definition url_base.h:32
@ IPV4
Definition url_base.h:27
tl::expected< result_type, ada::errors > result
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)
Declarations for the URL scheme.
ada_really_inline constexpr bool is_special() const noexcept
url_host_type host_type
Definition url_base.h:60
bool is_valid
Definition url_base.h:50
bool has_opaque_path
Definition url_base.h:55
void set_hash(std::string_view input)
Definition url.cpp:805
std::string get_search() const noexcept
Definition url.cpp:641
bool set_hostname(std::string_view input)
Definition url.cpp:753
bool set_host(std::string_view input)
Definition url.cpp:749
ada_really_inline bool has_credentials() const noexcept
Definition url-inl.h:19
bool set_password(std::string_view input)
Definition url.cpp:766
void set_search(std::string_view input)
Definition url.cpp:819
bool set_href(std::string_view input)
Definition url.cpp:870
bool set_username(std::string_view input)
Definition url.cpp:757
std::string get_host() const noexcept
Definition url.cpp:623
std::string get_hash() const noexcept
Definition url.cpp:660
bool set_pathname(std::string_view input)
Definition url.cpp:837
std::string get_origin() const noexcept override
Definition url.cpp:589
std::string get_hostname() const noexcept
Definition url.cpp:637
const std::string & get_password() const noexcept
Definition url.cpp:652
bool set_protocol(std::string_view input)
Definition url.cpp:846
std::string get_port() const noexcept
Definition url.cpp:656
const std::string & get_username() const noexcept
Definition url.cpp:648
bool set_port(std::string_view input)
Definition url.cpp:775
constexpr bool has_search() const noexcept override
Definition url-inl.h:163
std::string to_string() const override
Definition url.cpp:533
std::string get_protocol() const noexcept
Definition url.cpp:615
bool has_valid_domain() const noexcept override
Definition url.cpp:582