Ada 2.9.2
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
checkers.cpp
Go to the documentation of this file.
1#include "ada/checkers.h"
2
3#include <algorithm>
4
5namespace ada::checkers {
6
7ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept {
8 // The string is not empty and does not contain upper case ASCII characters.
9 //
10 // Optimization. To be considered as a possible ipv4, the string must end
11 // with 'x' or a lowercase hex character.
12 // Most of the time, this will be false so this simple check will save a lot
13 // of effort.
14 // If the address ends with a dot, we need to prune it (special case).
15 if (view.ends_with('.')) {
16 view.remove_suffix(1);
17 if (view.empty()) {
18 return false;
19 }
20 }
21 char last_char = view.back();
22 bool possible_ipv4 = (last_char >= '0' && last_char <= '9') ||
23 (last_char >= 'a' && last_char <= 'f') ||
24 last_char == 'x';
25 if (!possible_ipv4) {
26 return false;
27 }
28 // From the last character, find the last dot.
29 size_t last_dot = view.rfind('.');
30 if (last_dot != std::string_view::npos) {
31 // We have at least one dot.
32 view = view.substr(last_dot + 1);
33 }
37 if (std::ranges::all_of(view, ada::checkers::is_digit)) {
38 return true;
39 }
40 // It could be hex (0x), but not if there is a single character.
41 if (view.size() == 1) {
42 return false;
43 }
44 // It must start with 0x.
45 if (!view.starts_with("0x")) {
46 return false;
47 }
48 // We must allow "0x".
49 if (view.size() == 2) {
50 return true;
51 }
52 // We have 0x followed by some characters, we need to check that they are
53 // hexadecimals.
54 return std::all_of(view.begin() + 2, view.end(),
55 ada::unicode::is_lowercase_hex);
56}
57
58// for use with path_signature, we include all characters that need percent
59// encoding.
60static constexpr std::array<uint8_t, 256> path_signature_table =
61 []() consteval {
62 std::array<uint8_t, 256> result{};
63 for (size_t i = 0; i < 256; i++) {
64 if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e ||
65 i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7d || i > 0x7e) {
66 result[i] = 1;
67 } else if (i == 0x25) {
68 result[i] = 8;
69 } else if (i == 0x2e) {
70 result[i] = 4;
71 } else if (i == 0x5c) {
72 result[i] = 2;
73 } else {
74 result[i] = 0;
75 }
76 }
77 return result;
78 }();
79
80ada_really_inline constexpr uint8_t path_signature(
81 std::string_view input) noexcept {
82 // The path percent-encode set is the query percent-encode set and U+003F (?),
83 // U+0060 (`), U+007B ({), and U+007D (}). The query percent-encode set is the
84 // C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#),
85 // U+003C (<), and U+003E (>). The C0 control percent-encode set are the C0
86 // controls and all code points greater than U+007E (~).
87 size_t i = 0;
88 uint8_t accumulator{};
89 for (; i + 7 < input.size(); i += 8) {
90 accumulator |= uint8_t(path_signature_table[uint8_t(input[i])] |
91 path_signature_table[uint8_t(input[i + 1])] |
92 path_signature_table[uint8_t(input[i + 2])] |
93 path_signature_table[uint8_t(input[i + 3])] |
94 path_signature_table[uint8_t(input[i + 4])] |
95 path_signature_table[uint8_t(input[i + 5])] |
96 path_signature_table[uint8_t(input[i + 6])] |
97 path_signature_table[uint8_t(input[i + 7])]);
98 }
99 for (; i < input.size(); i++) {
100 accumulator |= uint8_t(path_signature_table[uint8_t(input[i])]);
101 }
102 return accumulator;
103}
104
105ada_really_inline constexpr bool verify_dns_length(
106 std::string_view input) noexcept {
107 if (input.back() == '.') {
108 if (input.size() > 254) return false;
109 } else if (input.size() > 253)
110 return false;
111
112 size_t start = 0;
113 while (start < input.size()) {
114 auto dot_location = input.find('.', start);
115 // If not found, it's likely the end of the domain
116 if (dot_location == std::string_view::npos) dot_location = input.size();
117
118 auto label_size = dot_location - start;
119 if (label_size > 63 || label_size == 0) return false;
120
121 start = dot_location + 1;
122 }
123
124 return true;
125}
126} // namespace ada::checkers
Declarations for URL specific checkers used within Ada.
#define ada_really_inline
Definition common_defs.h:84
Includes the definitions for validation functions.
static constexpr std::array< uint8_t, 256 > path_signature_table
Definition checkers.cpp:60
constexpr bool is_digit(char x) noexcept
tl::expected< result_type, ada::errors > result