Ada 3.1.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
checkers.cpp
Go to the documentation of this file.
1#include "ada/checkers-inl.h"
2#include "ada/checkers.h"
3#include "ada/unicode-inl.h"
4#include "ada/common_defs.h"
5
6#include <algorithm>
7#include <array>
8#include <string_view>
9
10namespace ada::checkers {
11
12ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept {
13 // The string is not empty and does not contain upper case ASCII characters.
14 //
15 // Optimization. To be considered as a possible ipv4, the string must end
16 // with 'x' or a lowercase hex character.
17 // Most of the time, this will be false so this simple check will save a lot
18 // of effort.
19 // If the address ends with a dot, we need to prune it (special case).
20 if (view.ends_with('.')) {
21 view.remove_suffix(1);
22 if (view.empty()) {
23 return false;
24 }
25 }
26 char last_char = view.back();
27 bool possible_ipv4 = (last_char >= '0' && last_char <= '9') ||
28 (last_char >= 'a' && last_char <= 'f') ||
29 last_char == 'x';
30 if (!possible_ipv4) {
31 return false;
32 }
33 // From the last character, find the last dot.
34 size_t last_dot = view.rfind('.');
35 if (last_dot != std::string_view::npos) {
36 // We have at least one dot.
37 view = view.substr(last_dot + 1);
38 }
42 if (std::ranges::all_of(view, ada::checkers::is_digit)) {
43 return true;
44 }
45 // It could be hex (0x), but not if there is a single character.
46 if (view.size() == 1) {
47 return false;
48 }
49 // It must start with 0x.
50 if (!view.starts_with("0x")) {
51 return false;
52 }
53 // We must allow "0x".
54 if (view.size() == 2) {
55 return true;
56 }
57 // We have 0x followed by some characters, we need to check that they are
58 // hexadecimals.
59 return std::all_of(view.begin() + 2, view.end(),
60 ada::unicode::is_lowercase_hex);
61}
62
63// for use with path_signature, we include all characters that need percent
64// encoding.
65static constexpr std::array<uint8_t, 256> path_signature_table =
66 []() consteval {
67 std::array<uint8_t, 256> result{};
68 for (size_t i = 0; i < 256; i++) {
69 if (i <= 0x20 || i == 0x22 || i == 0x23 || i == 0x3c || i == 0x3e ||
70 i == 0x3f || i == 0x60 || i == 0x7b || i == 0x7d || i > 0x7e) {
71 result[i] = 1;
72 } else if (i == 0x25) {
73 result[i] = 8;
74 } else if (i == 0x2e) {
75 result[i] = 4;
76 } else if (i == 0x5c) {
77 result[i] = 2;
78 } else {
79 result[i] = 0;
80 }
81 }
82 return result;
83 }();
84
85ada_really_inline constexpr uint8_t path_signature(
86 std::string_view input) noexcept {
87 // The path percent-encode set is the query percent-encode set and U+003F (?),
88 // U+0060 (`), U+007B ({), and U+007D (}). The query percent-encode set is the
89 // C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#),
90 // U+003C (<), and U+003E (>). The C0 control percent-encode set are the C0
91 // controls and all code points greater than U+007E (~).
92 size_t i = 0;
93 uint8_t accumulator{};
94 for (; i + 7 < input.size(); i += 8) {
95 accumulator |= uint8_t(path_signature_table[uint8_t(input[i])] |
96 path_signature_table[uint8_t(input[i + 1])] |
97 path_signature_table[uint8_t(input[i + 2])] |
98 path_signature_table[uint8_t(input[i + 3])] |
99 path_signature_table[uint8_t(input[i + 4])] |
100 path_signature_table[uint8_t(input[i + 5])] |
101 path_signature_table[uint8_t(input[i + 6])] |
102 path_signature_table[uint8_t(input[i + 7])]);
103 }
104 for (; i < input.size(); i++) {
105 accumulator |= uint8_t(path_signature_table[uint8_t(input[i])]);
106 }
107 return accumulator;
108}
109
110ada_really_inline constexpr bool verify_dns_length(
111 std::string_view input) noexcept {
112 if (input.back() == '.') {
113 if (input.size() > 254) return false;
114 } else if (input.size() > 253)
115 return false;
116
117 size_t start = 0;
118 while (start < input.size()) {
119 auto dot_location = input.find('.', start);
120 // If not found, it's likely the end of the domain
121 if (dot_location == std::string_view::npos) dot_location = input.size();
122
123 auto label_size = dot_location - start;
124 if (label_size > 63 || label_size == 0) return false;
125
126 start = dot_location + 1;
127 }
128
129 return true;
130}
131} // namespace ada::checkers
Definitions for URL specific checkers used within Ada.
Declarations for URL specific checkers used within Ada.
Common definitions for cross-platform compiler support.
#define ada_really_inline
Definition common_defs.h:81
Includes the definitions for validation functions.
static constexpr std::array< uint8_t, 256 > path_signature_table
Definition checkers.cpp:65
constexpr bool is_digit(char x) noexcept
tl::expected< result_type, ada::errors > result
Definitions for unicode operations.