Ada 3.4.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url.cpp
Go to the documentation of this file.
1#include "ada/scheme-inl.h"
2#include "ada/log.h"
3#include "ada/unicode-inl.h"
4
5#include <numeric>
6#include <algorithm>
7#include <iterator>
8#include <ranges>
9#include <string>
10#include <string_view>
11
12namespace ada {
13
14bool url::parse_opaque_host(std::string_view input) {
15 ada_log("parse_opaque_host ", input, " [", input.size(), " bytes]");
16 if (std::ranges::any_of(input, ada::unicode::is_forbidden_host_code_point)) {
17 return is_valid = false;
18 }
19
20 // Return the result of running UTF-8 percent-encode on input using the C0
21 // control percent-encode set.
22 host = ada::unicode::percent_encode(
24 return true;
25}
26
27bool url::parse_ipv4(std::string_view input) {
28 ada_log("parse_ipv4 ", input, " [", input.size(), " bytes]");
29 if (input.back() == '.') {
30 input.remove_suffix(1);
31 }
32 size_t digit_count{0};
33 int pure_decimal_count = 0; // entries that are decimal
34 std::string_view original_input =
35 input; // we might use this if pure_decimal_count == 4.
36 uint64_t ipv4{0};
37 // we could unroll for better performance?
38 for (; (digit_count < 4) && !(input.empty()); digit_count++) {
39 uint32_t
40 segment_result{}; // If any number exceeds 32 bits, we have an error.
41 bool is_hex = checkers::has_hex_prefix(input);
42 if (is_hex && ((input.length() == 2) ||
43 ((input.length() > 2) && (input[2] == '.')))) {
44 // special case
45 segment_result = 0;
46 input.remove_prefix(2);
47 } else {
48 std::from_chars_result r{};
49 if (is_hex) {
50 r = std::from_chars(input.data() + 2, input.data() + input.size(),
51 segment_result, 16);
52 } else if ((input.length() >= 2) && input[0] == '0' &&
53 checkers::is_digit(input[1])) {
54 r = std::from_chars(input.data() + 1, input.data() + input.size(),
55 segment_result, 8);
56 } else {
57 pure_decimal_count++;
58 r = std::from_chars(input.data(), input.data() + input.size(),
59 segment_result, 10);
60 }
61 if (r.ec != std::errc()) {
62 return is_valid = false;
63 }
64 input.remove_prefix(r.ptr - input.data());
65 }
66 if (input.empty()) {
67 // We have the last value.
68 // At this stage, ipv4 contains digit_count*8 bits.
69 // So we have 32-digit_count*8 bits left.
70 if (segment_result >= (uint64_t(1) << (32 - digit_count * 8))) {
71 return is_valid = false;
72 }
73 ipv4 <<= (32 - digit_count * 8);
74 ipv4 |= segment_result;
75 goto final;
76 } else {
77 // There is more, so that the value must no be larger than 255
78 // and we must have a '.'.
79 if ((segment_result > 255) || (input[0] != '.')) {
80 return is_valid = false;
81 }
82 ipv4 <<= 8;
83 ipv4 |= segment_result;
84 input.remove_prefix(1); // remove '.'
85 }
86 }
87 if ((digit_count != 4) || (!input.empty())) {
88 return is_valid = false;
89 }
90final:
91 // We could also check r.ptr to see where the parsing ended.
92 if (pure_decimal_count == 4) {
93 host = original_input; // The original input was already all decimal and we
94 // validated it.
95 } else {
96 host = ada::serializers::ipv4(ipv4); // We have to reserialize the address.
97 }
99 return true;
100}
101
102bool url::parse_ipv6(std::string_view input) {
103 ada_log("parse_ipv6 ", input, " [", input.size(), " bytes]");
104
105 if (input.empty()) {
106 return is_valid = false;
107 }
108 // Let address be a new IPv6 address whose IPv6 pieces are all 0.
109 std::array<uint16_t, 8> address{};
110
111 // Let pieceIndex be 0.
112 int piece_index = 0;
113
114 // Let compress be null.
115 std::optional<int> compress{};
116
117 // Let pointer be a pointer for input.
118 std::string_view::iterator pointer = input.begin();
119
120 // If c is U+003A (:), then:
121 if (input[0] == ':') {
122 // If remaining does not start with U+003A (:), validation error, return
123 // failure.
124 if (input.size() == 1 || input[1] != ':') {
125 ada_log("parse_ipv6 starts with : but the rest does not start with :");
126 return is_valid = false;
127 }
128
129 // Increase pointer by 2.
130 pointer += 2;
131
132 // Increase pieceIndex by 1 and then set compress to pieceIndex.
133 compress = ++piece_index;
134 }
135
136 // While c is not the EOF code point:
137 while (pointer != input.end()) {
138 // If pieceIndex is 8, validation error, return failure.
139 if (piece_index == 8) {
140 ada_log("parse_ipv6 piece_index == 8");
141 return is_valid = false;
142 }
143
144 // If c is U+003A (:), then:
145 if (*pointer == ':') {
146 // If compress is non-null, validation error, return failure.
147 if (compress.has_value()) {
148 ada_log("parse_ipv6 compress is non-null");
149 return is_valid = false;
150 }
151
152 // Increase pointer and pieceIndex by 1, set compress to pieceIndex, and
153 // then continue.
154 pointer++;
155 compress = ++piece_index;
156 continue;
157 }
158
159 // Let value and length be 0.
160 uint16_t value = 0, length = 0;
161
162 // While length is less than 4 and c is an ASCII hex digit,
163 // set value to value times 0x10 + c interpreted as hexadecimal number, and
164 // increase pointer and length by 1.
165 while (length < 4 && pointer != input.end() &&
166 unicode::is_ascii_hex_digit(*pointer)) {
167 // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int
168 value = uint16_t(value * 0x10 + unicode::convert_hex_to_binary(*pointer));
169 pointer++;
170 length++;
171 }
172
173 // If c is U+002E (.), then:
174 if (pointer != input.end() && *pointer == '.') {
175 // If length is 0, validation error, return failure.
176 if (length == 0) {
177 ada_log("parse_ipv6 length is 0");
178 return is_valid = false;
179 }
180
181 // Decrease pointer by length.
182 pointer -= length;
183
184 // If pieceIndex is greater than 6, validation error, return failure.
185 if (piece_index > 6) {
186 ada_log("parse_ipv6 piece_index > 6");
187 return is_valid = false;
188 }
189
190 // Let numbersSeen be 0.
191 int numbers_seen = 0;
192
193 // While c is not the EOF code point:
194 while (pointer != input.end()) {
195 // Let ipv4Piece be null.
196 std::optional<uint16_t> ipv4_piece{};
197
198 // If numbersSeen is greater than 0, then:
199 if (numbers_seen > 0) {
200 // If c is a U+002E (.) and numbersSeen is less than 4, then increase
201 // pointer by 1.
202 if (*pointer == '.' && numbers_seen < 4) {
203 pointer++;
204 }
205 // Otherwise, validation error, return failure.
206 else {
207 ada_log("parse_ipv6 Otherwise, validation error, return failure");
208 return is_valid = false;
209 }
210 }
211
212 // If c is not an ASCII digit, validation error, return failure.
213 if (pointer == input.end() || !checkers::is_digit(*pointer)) {
214 ada_log(
215 "parse_ipv6 If c is not an ASCII digit, validation error, return "
216 "failure");
217 return is_valid = false;
218 }
219
220 // While c is an ASCII digit:
221 while (pointer != input.end() && checkers::is_digit(*pointer)) {
222 // Let number be c interpreted as decimal number.
223 int number = *pointer - '0';
224
225 // If ipv4Piece is null, then set ipv4Piece to number.
226 if (!ipv4_piece.has_value()) {
227 ipv4_piece = number;
228 }
229 // Otherwise, if ipv4Piece is 0, validation error, return failure.
230 else if (ipv4_piece == 0) {
231 ada_log("parse_ipv6 if ipv4Piece is 0, validation error");
232 return is_valid = false;
233 }
234 // Otherwise, set ipv4Piece to ipv4Piece times 10 + number.
235 else {
236 ipv4_piece = *ipv4_piece * 10 + number;
237 }
238
239 // If ipv4Piece is greater than 255, validation error, return failure.
240 if (ipv4_piece > 255) {
241 ada_log("parse_ipv6 ipv4_piece > 255");
242 return is_valid = false;
243 }
244
245 // Increase pointer by 1.
246 pointer++;
247 }
248
249 // Set address[pieceIndex] to address[pieceIndex] times 0x100 +
250 // ipv4Piece.
251 // https://stackoverflow.com/questions/39060852/why-does-the-addition-of-two-shorts-return-an-int
252 address[piece_index] =
253 uint16_t(address[piece_index] * 0x100 + *ipv4_piece);
254
255 // Increase numbersSeen by 1.
256 numbers_seen++;
257
258 // If numbersSeen is 2 or 4, then increase pieceIndex by 1.
259 if (numbers_seen == 2 || numbers_seen == 4) {
260 piece_index++;
261 }
262 }
263
264 // If numbersSeen is not 4, validation error, return failure.
265 if (numbers_seen != 4) {
266 return is_valid = false;
267 }
268
269 // Break.
270 break;
271 }
272 // Otherwise, if c is U+003A (:):
273 else if ((pointer != input.end()) && (*pointer == ':')) {
274 // Increase pointer by 1.
275 pointer++;
276
277 // If c is the EOF code point, validation error, return failure.
278 if (pointer == input.end()) {
279 ada_log(
280 "parse_ipv6 If c is the EOF code point, validation error, return "
281 "failure");
282 return is_valid = false;
283 }
284 }
285 // Otherwise, if c is not the EOF code point, validation error, return
286 // failure.
287 else if (pointer != input.end()) {
288 ada_log(
289 "parse_ipv6 Otherwise, if c is not the EOF code point, validation "
290 "error, return failure");
291 return is_valid = false;
292 }
293
294 // Set address[pieceIndex] to value.
295 address[piece_index] = value;
296
297 // Increase pieceIndex by 1.
298 piece_index++;
299 }
300
301 // If compress is non-null, then:
302 if (compress.has_value()) {
303 // Let swaps be pieceIndex - compress.
304 int swaps = piece_index - *compress;
305
306 // Set pieceIndex to 7.
307 piece_index = 7;
308
309 // While pieceIndex is not 0 and swaps is greater than 0,
310 // swap address[pieceIndex] with address[compress + swaps - 1], and then
311 // decrease both pieceIndex and swaps by 1.
312 while (piece_index != 0 && swaps > 0) {
313 std::swap(address[piece_index], address[*compress + swaps - 1]);
314 piece_index--;
315 swaps--;
316 }
317 }
318 // Otherwise, if compress is null and pieceIndex is not 8, validation error,
319 // return failure.
320 else if (piece_index != 8) {
321 ada_log(
322 "parse_ipv6 if compress is null and pieceIndex is not 8, validation "
323 "error, return failure");
324 return is_valid = false;
325 }
326 host = ada::serializers::ipv6(address);
327 ada_log("parse_ipv6 ", *host);
328 host_type = IPV6;
329 return true;
330}
331
332template <bool has_state_override>
333ada_really_inline bool url::parse_scheme(const std::string_view input) {
334 auto parsed_type = ada::scheme::get_scheme_type(input);
335 bool is_input_special = (parsed_type != ada::scheme::NOT_SPECIAL);
340 if (is_input_special) { // fast path!!!
341 if constexpr (has_state_override) {
342 // If url's scheme is not a special scheme and buffer is a special scheme,
343 // then return.
344 if (is_special() != is_input_special) {
345 return false;
346 }
347
348 // If url includes credentials or has a non-null port, and buffer is
349 // "file", then return.
350 if ((has_credentials() || port.has_value()) &&
351 parsed_type == ada::scheme::type::FILE) {
352 return false;
353 }
354
355 // If url's scheme is "file" and its host is an empty host, then return.
356 // An empty host is the empty string.
357 if (type == ada::scheme::type::FILE && host.has_value() &&
358 host.value().empty()) {
359 return false;
360 }
361 }
362
363 type = parsed_type;
364
365 if constexpr (has_state_override) {
366 // This is uncommon.
367 uint16_t urls_scheme_port = get_special_port();
368
369 if (urls_scheme_port) {
370 // If url's port is url's scheme's default port, then set url's port to
371 // null.
372 if (port.has_value() && *port == urls_scheme_port) {
373 port = std::nullopt;
374 }
375 }
376 }
377 } else { // slow path
378 std::string _buffer(input);
379 // Next function is only valid if the input is ASCII and returns false
380 // otherwise, but it seems that we always have ascii content so we do not
381 // need to check the return value.
382 // bool is_ascii =
383 unicode::to_lower_ascii(_buffer.data(), _buffer.size());
384
385 if constexpr (has_state_override) {
386 // If url's scheme is a special scheme and buffer is not a special scheme,
387 // then return. If url's scheme is not a special scheme and buffer is a
388 // special scheme, then return.
389 if (is_special() != ada::scheme::is_special(_buffer)) {
390 return true;
391 }
392
393 // If url includes credentials or has a non-null port, and buffer is
394 // "file", then return.
395 if ((has_credentials() || port.has_value()) && _buffer == "file") {
396 return true;
397 }
398
399 // If url's scheme is "file" and its host is an empty host, then return.
400 // An empty host is the empty string.
401 if (type == ada::scheme::type::FILE && host.has_value() &&
402 host.value().empty()) {
403 return true;
404 }
405 }
406
407 set_scheme(std::move(_buffer));
408
409 if constexpr (has_state_override) {
410 // This is uncommon.
411 uint16_t urls_scheme_port = get_special_port();
412
413 if (urls_scheme_port) {
414 // If url's port is url's scheme's default port, then set url's port to
415 // null.
416 if (port.has_value() && *port == urls_scheme_port) {
417 port = std::nullopt;
418 }
419 }
420 }
421 }
422
423 return true;
424}
425
426ada_really_inline bool url::parse_host(std::string_view input) {
427 ada_log("parse_host ", input, " [", input.size(), " bytes]");
428 if (input.empty()) {
429 return is_valid = false;
430 } // technically unnecessary.
431 // If input starts with U+005B ([), then:
432 if (input[0] == '[') {
433 // If input does not end with U+005D (]), validation error, return failure.
434 if (input.back() != ']') {
435 return is_valid = false;
436 }
437 ada_log("parse_host ipv6");
438
439 // Return the result of IPv6 parsing input with its leading U+005B ([) and
440 // trailing U+005D (]) removed.
441 input.remove_prefix(1);
442 input.remove_suffix(1);
443 return parse_ipv6(input);
444 }
445
446 // If isNotSpecial is true, then return the result of opaque-host parsing
447 // input.
448 if (!is_special()) {
449 return parse_opaque_host(input);
450 }
451
452 // Fast path: try to parse as pure decimal IPv4(a.b.c.d) first.
453 const uint64_t fast_result = checkers::try_parse_ipv4_fast(input);
454 if (fast_result < checkers::ipv4_fast_fail) {
455 // Fast path succeeded - input is pure decimal IPv4
456 if (!input.empty() && input.back() == '.') {
457 host = input.substr(0, input.size() - 1);
458 } else {
459 host = input;
460 }
461 host_type = IPV4;
462 ada_log("parse_host fast path decimal ipv4");
463 return true;
464 }
465 // Let domain be the result of running UTF-8 decode without BOM on the
466 // percent-decoding of input. Let asciiDomain be the result of running domain
467 // to ASCII with domain and false. The most common case is an ASCII input, in
468 // which case we do not need to call the expensive 'to_ascii' if a few
469 // conditions are met: no '%' and no 'xn-' subsequence.
470 std::string buffer = std::string(input);
471 // This next function checks that the result is ascii, but we are going to
472 // to check anyhow with is_forbidden.
473 // bool is_ascii =
474 unicode::to_lower_ascii(buffer.data(), buffer.size());
475 bool is_forbidden = unicode::contains_forbidden_domain_code_point(
476 buffer.data(), buffer.size());
477 if (is_forbidden == 0 && buffer.find("xn-") == std::string_view::npos) {
478 // fast path
479 host = std::move(buffer);
480
481 // Check for other IPv4 formats (hex, octal, etc.)
482 if (checkers::is_ipv4(host.value())) {
483 ada_log("parse_host fast path ipv4");
484 return parse_ipv4(host.value());
485 }
486 ada_log("parse_host fast path ", *host);
487 return true;
488 }
489 ada_log("parse_host calling to_ascii");
490 is_valid = ada::unicode::to_ascii(host, input, input.find('%'));
491 if (!is_valid) {
492 ada_log("parse_host to_ascii returns false");
493 return is_valid = false;
494 }
495 ada_log("parse_host to_ascii succeeded ", *host, " [", host->size(),
496 " bytes]");
497
498 if (std::any_of(host.value().begin(), host.value().end(),
499 ada::unicode::is_forbidden_domain_code_point)) {
500 host = std::nullopt;
501 return is_valid = false;
502 }
503
504 // If asciiDomain ends in a number, then return the result of IPv4 parsing
505 // asciiDomain.
506 if (checkers::is_ipv4(host.value())) {
507 ada_log("parse_host got ipv4 ", *host);
508 return parse_ipv4(host.value());
509 }
510
511 return true;
512}
513
514ada_really_inline void url::parse_path(std::string_view input) {
515 ada_log("parse_path ", input);
516 std::string tmp_buffer;
517 std::string_view internal_input;
518 if (unicode::has_tabs_or_newline(input)) {
519 tmp_buffer = input;
520 // Optimization opportunity: Instead of copying and then pruning, we could
521 // just directly build the string from user_input.
522 helpers::remove_ascii_tab_or_newline(tmp_buffer);
523 internal_input = tmp_buffer;
524 } else {
525 internal_input = input;
526 }
527
528 // If url is special, then:
529 if (is_special()) {
530 if (internal_input.empty()) {
531 path = "/";
532 } else if ((internal_input[0] == '/') || (internal_input[0] == '\\')) {
533 helpers::parse_prepared_path(internal_input.substr(1), type, path);
534 } else {
535 helpers::parse_prepared_path(internal_input, type, path);
536 }
537 } else if (!internal_input.empty()) {
538 if (internal_input[0] == '/') {
539 helpers::parse_prepared_path(internal_input.substr(1), type, path);
540 } else {
541 helpers::parse_prepared_path(internal_input, type, path);
542 }
543 } else {
544 if (!host.has_value()) {
545 path = "/";
546 }
547 }
548}
549
550[[nodiscard]] std::string url::to_string() const {
551 if (!is_valid) {
552 return "null";
553 }
554 std::string answer;
555 auto back = std::back_insert_iterator(answer);
556 answer.append("{\n");
557 answer.append("\t\"protocol\":\"");
558 helpers::encode_json(get_protocol(), back);
559 answer.append("\",\n");
560 if (has_credentials()) {
561 answer.append("\t\"username\":\"");
562 helpers::encode_json(username, back);
563 answer.append("\",\n");
564 answer.append("\t\"password\":\"");
565 helpers::encode_json(password, back);
566 answer.append("\",\n");
567 }
568 if (host.has_value()) {
569 answer.append("\t\"host\":\"");
570 helpers::encode_json(host.value(), back);
571 answer.append("\",\n");
572 }
573 if (port.has_value()) {
574 answer.append("\t\"port\":\"");
575 answer.append(std::to_string(port.value()));
576 answer.append("\",\n");
577 }
578 answer.append("\t\"path\":\"");
579 helpers::encode_json(path, back);
580 answer.append("\",\n");
581 answer.append("\t\"opaque path\":");
582 answer.append((has_opaque_path ? "true" : "false"));
583 if (has_search()) {
584 answer.append(",\n");
585 answer.append("\t\"query\":\"");
586 // NOLINTNEXTLINE(bugprone-unchecked-optional-access)
587 helpers::encode_json(query.value(), back);
588 answer.append("\"");
589 }
590 if (hash.has_value()) {
591 answer.append(",\n");
592 answer.append("\t\"hash\":\"");
593 helpers::encode_json(hash.value(), back);
594 answer.append("\"");
595 }
596 answer.append("\n}");
597 return answer;
598}
599
600[[nodiscard]] bool url::has_valid_domain() const noexcept {
601 if (!host.has_value()) {
602 return false;
603 }
604 return checkers::verify_dns_length(host.value());
605}
606
607[[nodiscard]] std::string url::get_origin() const noexcept {
608 if (is_special()) {
609 // Return a new opaque origin.
610 if (type == scheme::FILE) {
611 return "null";
612 }
613 return ada::helpers::concat(get_protocol(), "//", get_host());
614 }
615
616 if (non_special_scheme == "blob") {
617 if (!path.empty()) {
618 auto result = ada::parse<ada::url>(path);
619 if (result &&
620 (result->type == scheme::HTTP || result->type == scheme::HTTPS)) {
621 // If pathURL's scheme is not "http" and not "https", then return a
622 // new opaque origin.
623 return ada::helpers::concat(result->get_protocol(), "//",
624 result->get_host());
625 }
626 }
627 }
628
629 // Return a new opaque origin.
630 return "null";
631}
632
633[[nodiscard]] std::string url::get_protocol() const noexcept {
634 if (is_special()) {
635 return helpers::concat(ada::scheme::details::is_special_list[type], ":");
636 }
637 // We only move the 'scheme' if it is non-special.
638 return helpers::concat(non_special_scheme, ":");
639}
640
641[[nodiscard]] std::string url::get_host() const noexcept {
642 // If url's host is null, then return the empty string.
643 // If url's port is null, return url's host, serialized.
644 // Return url's host, serialized, followed by U+003A (:) and url's port,
645 // serialized.
646 if (!host.has_value()) {
647 return "";
648 }
649 if (port.has_value()) {
650 return host.value() + ":" + get_port();
651 }
652 return host.value();
653}
654
655[[nodiscard]] std::string url::get_hostname() const noexcept {
656 return host.value_or("");
657}
658
659[[nodiscard]] std::string url::get_search() const noexcept {
660 // If this's URL's query is either null or the empty string, then return the
661 // empty string. Return U+003F (?), followed by this's URL's query.
662 return (!query.has_value() || (query.value().empty())) ? ""
663 : "?" + query.value();
664}
665
666[[nodiscard]] const std::string& url::get_username() const noexcept {
667 return username;
668}
669
670[[nodiscard]] const std::string& url::get_password() const noexcept {
671 return password;
672}
673
674[[nodiscard]] std::string url::get_port() const noexcept {
675 return port.has_value() ? std::to_string(port.value()) : "";
676}
677
678[[nodiscard]] std::string url::get_hash() const noexcept {
679 // If this's URL's fragment is either null or the empty string, then return
680 // the empty string. Return U+0023 (#), followed by this's URL's fragment.
681 return (!hash.has_value() || (hash.value().empty())) ? ""
682 : "#" + hash.value();
683}
684
685template <bool override_hostname>
686bool url::set_host_or_hostname(const std::string_view input) {
687 if (has_opaque_path) {
688 return false;
689 }
690
691 std::optional<std::string> previous_host = host;
692 std::optional<uint16_t> previous_port = port;
693
694 size_t host_end_pos = input.find('#');
695 std::string _host(input.data(), host_end_pos != std::string_view::npos
696 ? host_end_pos
697 : input.size());
698 helpers::remove_ascii_tab_or_newline(_host);
699 std::string_view new_host(_host);
700
701 // If url's scheme is "file", then set state to file host state, instead of
702 // host state.
703 if (type != ada::scheme::type::FILE) {
704 std::string_view host_view(_host.data(), _host.length());
705 auto [location, found_colon] =
706 helpers::get_host_delimiter_location(is_special(), host_view);
707
708 // Otherwise, if c is U+003A (:) and insideBrackets is false, then:
709 // Note: the 'found_colon' value is true if and only if a colon was
710 // encountered while not inside brackets.
711 if (found_colon) {
712 // If buffer is the empty string, host-missing validation error, return
713 // failure.
714 std::string_view buffer = host_view.substr(0, location);
715 if (buffer.empty()) {
716 return false;
717 }
718
719 // If state override is given and state override is hostname state, then
720 // return failure.
721 if constexpr (override_hostname) {
722 return false;
723 }
724
725 // Let host be the result of host parsing buffer with url is not special.
726 bool succeeded = parse_host(buffer);
727 if (!succeeded) {
728 host = std::move(previous_host);
729 update_base_port(previous_port);
730 return false;
731 }
732
733 // Set url's host to host, buffer to the empty string, and state to port
734 // state.
735 std::string_view port_buffer = new_host.substr(location + 1);
736 if (!port_buffer.empty()) {
737 set_port(port_buffer);
738 }
739 return true;
740 }
741 // Otherwise, if one of the following is true:
742 // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
743 // - url is special and c is U+005C (\‍)
744 else {
745 // If url is special and host_view is the empty string, host-missing
746 // validation error, return failure.
747 if (host_view.empty() && is_special()) {
748 return false;
749 }
750
751 // Otherwise, if state override is given, host_view is the empty string,
752 // and either url includes credentials or url's port is non-null, then
753 // return failure.
754 if (host_view.empty() && (has_credentials() || port.has_value())) {
755 return false;
756 }
757
758 // Let host be the result of host parsing host_view with url is not
759 // special.
760 if (host_view.empty() && !is_special()) {
761 host = "";
762 return true;
763 }
764
765 bool succeeded = parse_host(host_view);
766 if (!succeeded) {
767 host = std::move(previous_host);
768 update_base_port(previous_port);
769 return false;
770 }
771 return true;
772 }
773 }
774
775 size_t location = new_host.find_first_of("/\\?");
776 if (location != std::string_view::npos) {
777 new_host.remove_suffix(new_host.length() - location);
778 }
779
780 if (new_host.empty()) {
781 // Set url's host to the empty string.
782 host = "";
783 } else {
784 // Let host be the result of host parsing buffer with url is not special.
785 if (!parse_host(new_host)) {
786 host = std::move(previous_host);
787 update_base_port(previous_port);
788 return false;
789 }
790
791 // If host is "localhost", then set host to the empty string.
792 if (host == "localhost") {
793 host = "";
794 }
795 }
796 return true;
797}
798
799bool url::set_host(const std::string_view input) {
800 return set_host_or_hostname<false>(input);
801}
802
803bool url::set_hostname(const std::string_view input) {
804 return set_host_or_hostname<true>(input);
805}
806
807bool url::set_username(const std::string_view input) {
808 if (cannot_have_credentials_or_port()) {
809 return false;
810 }
811 username = ada::unicode::percent_encode(
813 return true;
814}
815
816bool url::set_password(const std::string_view input) {
817 if (cannot_have_credentials_or_port()) {
818 return false;
819 }
820 password = ada::unicode::percent_encode(
822 return true;
823}
824
825bool url::set_port(const std::string_view input) {
826 if (cannot_have_credentials_or_port()) {
827 return false;
828 }
829
830 if (input.empty()) {
831 port = std::nullopt;
832 return true;
833 }
834
835 std::string trimmed(input);
836 helpers::remove_ascii_tab_or_newline(trimmed);
837
838 if (trimmed.empty()) {
839 return true;
840 }
841
842 // Input should not start with a non-digit character.
843 if (!ada::unicode::is_ascii_digit(trimmed.front())) {
844 return false;
845 }
846
847 // Find the first non-digit character to determine the length of digits
848 auto first_non_digit =
849 std::ranges::find_if_not(trimmed, ada::unicode::is_ascii_digit);
850 std::string_view digits_to_parse =
851 std::string_view(trimmed.data(), first_non_digit - trimmed.begin());
852
853 // Revert changes if parse_port fails.
854 std::optional<uint16_t> previous_port = port;
855 parse_port(digits_to_parse);
856 if (is_valid) {
857 return true;
858 }
859 port = std::move(previous_port);
860 is_valid = true;
861 return false;
862}
863
864void url::set_hash(const std::string_view input) {
865 if (input.empty()) {
866 hash = std::nullopt;
867 helpers::strip_trailing_spaces_from_opaque_path(*this);
868 return;
869 }
870
871 std::string new_value;
872 new_value = input[0] == '#' ? input.substr(1) : input;
873 helpers::remove_ascii_tab_or_newline(new_value);
874 hash = unicode::percent_encode(new_value,
876}
877
878void url::set_search(const std::string_view input) {
879 if (input.empty()) {
880 query = std::nullopt;
881 helpers::strip_trailing_spaces_from_opaque_path(*this);
882 return;
883 }
884
885 std::string new_value;
886 new_value = input[0] == '?' ? input.substr(1) : input;
887 helpers::remove_ascii_tab_or_newline(new_value);
888
889 auto query_percent_encode_set =
892
893 query = ada::unicode::percent_encode(new_value, query_percent_encode_set);
894}
895
896bool url::set_pathname(const std::string_view input) {
897 if (has_opaque_path) {
898 return false;
899 }
900 path.clear();
901 parse_path(input);
902 return true;
903}
904
905bool url::set_protocol(const std::string_view input) {
906 std::string view(input);
907 helpers::remove_ascii_tab_or_newline(view);
908 if (view.empty()) {
909 return true;
910 }
911
912 // Schemes should start with alpha values.
913 if (!checkers::is_alpha(view[0])) {
914 return false;
915 }
916
917 view.append(":");
918
919 std::string::iterator pointer =
920 std::ranges::find_if_not(view, unicode::is_alnum_plus);
921
922 if (pointer != view.end() && *pointer == ':') {
923 return parse_scheme<true>(
924 std::string_view(view.data(), pointer - view.begin()));
925 }
926 return false;
927}
928
929bool url::set_href(const std::string_view input) {
931
932 if (out) {
933 *this = *out;
934 }
935
936 return out.has_value();
937}
938
939} // namespace ada
#define ada_really_inline
Definition common_defs.h:85
constexpr uint8_t QUERY_PERCENT_ENCODE[32]
constexpr uint8_t SPECIAL_QUERY_PERCENT_ENCODE[32]
constexpr uint8_t C0_CONTROL_PERCENT_ENCODE[32]
constexpr uint8_t USERINFO_PERCENT_ENCODE[32]
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
constexpr bool has_hex_prefix(std::string_view input)
constexpr uint64_t ipv4_fast_fail
Definition checkers.h:129
constexpr bool is_alpha(char x) noexcept
constexpr bool is_digit(char x) noexcept
ada_really_inline constexpr uint64_t try_parse_ipv4_fast(std::string_view input) noexcept
constexpr std::string_view is_special_list[]
Definition scheme-inl.h:19
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept
Definition scheme-inl.h:72
@ NOT_SPECIAL
Definition scheme.h:43
std::string ipv6(const std::array< uint16_t, 8 > &address) noexcept
std::string ipv4(uint64_t address) noexcept
Definition ada_idna.h:13
@ IPV6
Definition url_base.h:32
@ IPV4
Definition url_base.h:30
tl::expected< result_type, ada::errors > result
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)
Definitions for the URL scheme.
ada_really_inline constexpr bool is_special() const noexcept
url_host_type host_type
Definition url_base.h:67
bool is_valid
Definition url_base.h:56
bool has_opaque_path
Definition url_base.h:62
void set_hash(std::string_view input)
Definition url.cpp:864
std::string get_search() const noexcept
Definition url.cpp:659
bool set_hostname(std::string_view input)
Definition url.cpp:803
bool set_host(std::string_view input)
Definition url.cpp:799
ada_really_inline bool has_credentials() const noexcept
Definition url-inl.h:19
bool set_password(std::string_view input)
Definition url.cpp:816
void set_search(std::string_view input)
Definition url.cpp:878
bool set_href(std::string_view input)
Definition url.cpp:929
bool set_username(std::string_view input)
Definition url.cpp:807
std::string get_host() const noexcept
Definition url.cpp:641
std::string get_hash() const noexcept
Definition url.cpp:678
bool set_pathname(std::string_view input)
Definition url.cpp:896
std::string get_origin() const noexcept override
Definition url.cpp:607
std::string get_hostname() const noexcept
Definition url.cpp:655
const std::string & get_password() const noexcept
Definition url.cpp:670
bool set_protocol(std::string_view input)
Definition url.cpp:905
std::string get_port() const noexcept
Definition url.cpp:674
const std::string & get_username() const noexcept
Definition url.cpp:666
bool set_port(std::string_view input)
Definition url.cpp:825
constexpr bool has_search() const noexcept override
Definition url-inl.h:164
std::string to_string() const override
Definition url.cpp:550
std::string get_protocol() const noexcept
Definition url.cpp:633
bool has_valid_domain() const noexcept override
Definition url.cpp:600
Definitions for unicode operations.