Ada 2.9.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
checkers.cpp
Go to the documentation of this file.
1#include "ada/checkers.h"
2
3#include <algorithm>
4
5namespace ada::checkers {
6
7ada_really_inline ada_constexpr bool is_ipv4(std::string_view view) noexcept {
8 // The string is not empty and does not contain upper case ASCII characters.
9 //
10 // Optimization. To be considered as a possible ipv4, the string must end
11 // with 'x' or a lowercase hex character.
12 // Most of the time, this will be false so this simple check will save a lot
13 // of effort.
14 char last_char = view.back();
15 // If the address ends with a dot, we need to prune it (special case).
16 if (last_char == '.') {
17 view.remove_suffix(1);
18 if (view.empty()) {
19 return false;
20 }
21 last_char = view.back();
22 }
23 bool possible_ipv4 = (last_char >= '0' && last_char <= '9') ||
24 (last_char >= 'a' && last_char <= 'f') ||
25 last_char == 'x';
26 if (!possible_ipv4) {
27 return false;
28 }
29 // From the last character, find the last dot.
30 size_t last_dot = view.rfind('.');
31 if (last_dot != std::string_view::npos) {
32 // We have at least one dot.
33 view = view.substr(last_dot + 1);
34 }
38 if (std::all_of(view.begin(), view.end(), ada::checkers::is_digit)) {
39 return true;
40 }
41 // It could be hex (0x), but not if there is a single character.
42 if (view.size() == 1) {
43 return false;
44 }
45 // It must start with 0x.
46 if (!std::equal(view.begin(), view.begin() + 2, "0x")) {
47 return false;
48 }
49 // We must allow "0x".
50 if (view.size() == 2) {
51 return true;
52 }
53 // We have 0x followed by some characters, we need to check that they are
54 // hexadecimals.
55 return std::all_of(view.begin() + 2, view.end(),
56 ada::unicode::is_lowercase_hex);
57}
58
59// for use with path_signature, we include all characters that need percent
60// encoding.
61static constexpr std::array<uint8_t, 256> path_signature_table =
62 []() constexpr {
63 std::array<uint8_t, 256> result{};
64 for (size_t i = 0; i < 256; i++) {
65 if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e ||
66 i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7d || i > 0x7e) {
67 result[i] = 1;
68 } else if (i == 0x25) {
69 result[i] = 8;
70 } else if (i == 0x2e) {
71 result[i] = 4;
72 } else if (i == 0x5c) {
73 result[i] = 2;
74 } else {
75 result[i] = 0;
76 }
77 }
78 return result;
79 }();
80
81ada_really_inline constexpr uint8_t path_signature(
82 std::string_view input) noexcept {
83 // The path percent-encode set is the query percent-encode set and U+003F (?),
84 // U+0060 (`), U+007B ({), and U+007D (}). The query percent-encode set is the
85 // C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#),
86 // U+003C (<), and U+003E (>). The C0 control percent-encode set are the C0
87 // controls and all code points greater than U+007E (~).
88 size_t i = 0;
89 uint8_t accumulator{};
90 for (; i + 7 < input.size(); i += 8) {
91 accumulator |= uint8_t(path_signature_table[uint8_t(input[i])] |
92 path_signature_table[uint8_t(input[i + 1])] |
93 path_signature_table[uint8_t(input[i + 2])] |
94 path_signature_table[uint8_t(input[i + 3])] |
95 path_signature_table[uint8_t(input[i + 4])] |
96 path_signature_table[uint8_t(input[i + 5])] |
97 path_signature_table[uint8_t(input[i + 6])] |
98 path_signature_table[uint8_t(input[i + 7])]);
99 }
100 for (; i < input.size(); i++) {
101 accumulator |= uint8_t(path_signature_table[uint8_t(input[i])]);
102 }
103 return accumulator;
104}
105
106ada_really_inline constexpr bool verify_dns_length(
107 std::string_view input) noexcept {
108 if (input.back() == '.') {
109 if (input.size() > 254) return false;
110 } else if (input.size() > 253)
111 return false;
112
113 size_t start = 0;
114 while (start < input.size()) {
115 auto dot_location = input.find('.', start);
116 // If not found, it's likely the end of the domain
117 if (dot_location == std::string_view::npos) dot_location = input.size();
118
119 auto label_size = dot_location - start;
120 if (label_size > 63 || label_size == 0) return false;
121
122 start = dot_location + 1;
123 }
124
125 return true;
126}
127} // namespace ada::checkers
Declarations for URL specific checkers used within Ada.
#define ada_constexpr
#define ada_really_inline
Definition common_defs.h:84
Includes the definitions for validation functions.
static constexpr std::array< uint8_t, 256 > path_signature_table
Definition checkers.cpp:61
constexpr bool is_digit(char x) noexcept
tl::expected< result_type, ada::errors > result