20 return c ==
'\r' || c ==
'\n' || c ==
'\t';
24 return 0x101010101010101ull * v;
27constexpr bool to_lower_ascii(
char* input,
size_t length)
noexcept {
29 uint64_t broadcast_Ap =
broadcast(128 -
'A');
30 uint64_t broadcast_Zp =
broadcast(128 -
'Z' - 1);
31 uint64_t non_ascii = 0;
34 for (; i + 7 < length; i += 8) {
36 memcpy(&word, input + i,
sizeof(word));
37 non_ascii |= (word & broadcast_80);
39 (((word + broadcast_Ap) ^ (word + broadcast_Zp)) & broadcast_80) >> 2;
40 memcpy(input + i, &word,
sizeof(word));
44 memcpy(&word, input + i, length - i);
45 non_ascii |= (word & broadcast_80);
47 (((word + broadcast_Ap) ^ (word + broadcast_Zp)) & broadcast_80) >> 2;
48 memcpy(input + i, &word, length - i);
50 return non_ascii == 0;
54 std::string_view user_input)
noexcept {
56 if (user_input.size() < 16) {
57 return std::any_of(user_input.begin(), user_input.end(),
74 static uint8_t rnt_array[16] = {1, 0, 0, 0, 0, 0, 0, 0,
75 0, 9, 10, 0, 0, 13, 0, 0};
76 const uint8x16_t rnt = vld1q_u8(rnt_array);
78 uint8x16_t running{0};
79 for (; i + 15 < user_input.size(); i += 16) {
80 uint8x16_t word = vld1q_u8((
const uint8_t*)user_input.data() + i);
82 running = vorrq_u8(running, vceqq_u8(vqtbl1q_u8(rnt, word), word));
84 if (i < user_input.size()) {
86 vld1q_u8((
const uint8_t*)user_input.data() + user_input.length() - 16);
87 running = vorrq_u8(running, vceqq_u8(vqtbl1q_u8(rnt, word), word));
89 return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0;
93 std::string_view user_input)
noexcept {
95 if (user_input.size() < 16) {
96 return std::any_of(user_input.begin(), user_input.end(),
101 const __m128i mask1 = _mm_set1_epi8(
'\r');
102 const __m128i mask2 = _mm_set1_epi8(
'\n');
103 const __m128i mask3 = _mm_set1_epi8(
'\t');
106 for (; i + 15 < user_input.size(); i += 16) {
107 __m128i word = _mm_loadu_si128((
const __m128i*)(user_input.data() + i));
108 running = _mm_or_si128(
109 _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
110 _mm_cmpeq_epi8(word, mask2))),
111 _mm_cmpeq_epi8(word, mask3));
113 if (i < user_input.size()) {
114 __m128i word = _mm_loadu_si128(
115 (
const __m128i*)(user_input.data() + user_input.length() - 16));
116 running = _mm_or_si128(
117 _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
118 _mm_cmpeq_epi8(word, mask2))),
119 _mm_cmpeq_epi8(word, mask3));
121 return _mm_movemask_epi8(running) != 0;
125 std::string_view user_input)
noexcept {
126 auto has_zero_byte = [](uint64_t v) {
127 return ((v - 0x0101010101010101) & ~(v) & 0x8080808080808080);
134 for (; i + 7 < user_input.size(); i += 8) {
136 memcpy(&word, user_input.data() + i,
sizeof(word));
137 uint64_t xor1 = word ^ mask1;
138 uint64_t xor2 = word ^ mask2;
139 uint64_t xor3 = word ^ mask3;
140 running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
142 if (i < user_input.size()) {
144 memcpy(&word, user_input.data() + i, user_input.size() - i);
145 uint64_t xor1 = word ^ mask1;
146 uint64_t xor2 = word ^ mask2;
147 uint64_t xor3 = word ^ mask3;
148 running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
160 std::array<uint8_t, 256>
result{};
161 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
162 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|'}) {
169 const char c)
noexcept {
175 std::array<uint8_t, 256>
result{};
176 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
177 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|',
'%'}) {
180 for (uint8_t c = 0; c <= 32; c++) {
183 for (
size_t c = 127; c < 255; c++) {
192 const char c)
noexcept {
197 const char* input,
size_t length)
noexcept {
199 uint8_t accumulator{};
200 for (; i + 4 <= length; i += 4) {
206 for (; i < length; i++) {
212constexpr static std::array<uint8_t, 256>
214 std::array<uint8_t, 256>
result{};
215 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
216 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|',
'%'}) {
219 for (uint8_t c =
'A'; c <=
'Z'; c++) {
222 for (uint8_t c = 0; c <= 32; c++) {
225 for (
size_t c = 127; c < 255; c++) {
232contains_forbidden_domain_code_point_or_upper(
const char* input,
233 size_t length)
noexcept {
235 uint8_t accumulator{};
236 for (; i + 4 <= length; i += 4) {
246 for (; i < length; i++) {
255 std::array<bool, 256>
result{};
256 for (
size_t c = 0; c < 256; c++) {
257 result[c] = (c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'z') ||
258 (c >=
'A' && c <=
'Z') || c ==
'+' || c ==
'-' || c ==
'.';
271 return (c >=
'0' && c <=
'9') || (c >=
'A' && c <=
'F') ||
272 (c >=
'a' && c <=
'f');
276 return (
unsigned char)c <=
' ';
280 const char c)
noexcept {
281 return c ==
'\t' || c ==
'\n' || c ==
'\r';
285 "..",
"%2e.",
".%2e",
"%2e%2e"};
288 std::string_view input)
noexcept {
293 uint64_t half_length = uint64_t(input.size()) / 2;
294 if (half_length - 1 > 2) {
299 if ((input[0] !=
'.') && (input[0] !=
'%')) {
303 int hash_value = (input.size() + (
unsigned)(input[0])) & 3;
305 if (target.size() != input.size()) {
310 auto prefix_equal_unsafe = [](std::string_view a, std::string_view b) {
312 memcpy(&A, a.data(),
sizeof(A));
313 memcpy(&B, b.data(),
sizeof(B));
316 if (!prefix_equal_unsafe(input, target)) {
319 for (
size_t i = 2; i < input.size(); i++) {
321 if ((uint8_t((c | 0x20) - 0x61) <= 25 ? (c | 0x20) : c) != target[i]) {
337 std::string_view input)
noexcept {
338 return input ==
"." || input ==
"%2e" || input ==
"%2E";
342 return (c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'f');
346 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11,
347 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15};
349unsigned constexpr convert_hex_to_binary(
const char c)
noexcept {
353std::string percent_decode(
const std::string_view input,
size_t first_percent) {
356 if (first_percent == std::string_view::npos) {
357 return std::string(input);
360 dest.reserve(input.length());
361 dest.append(input.substr(0, first_percent));
362 const char* pointer = input.data() + first_percent;
363 const char* end = input.data() + input.size();
366 while (pointer < end) {
367 const char ch = pointer[0];
368 size_t remaining = end - pointer - 1;
369 if (ch !=
'%' || remaining < 2 ||
371 (!is_ascii_hex_digit(pointer[1]) ||
372 !is_ascii_hex_digit(pointer[2])))) {
376 unsigned a = convert_hex_to_binary(pointer[1]);
377 unsigned b = convert_hex_to_binary(pointer[2]);
378 char c =
static_cast<char>(a * 16 + b);
386std::string percent_encode(
const std::string_view input,
387 const uint8_t character_set[]) {
388 auto pointer = std::ranges::find_if(input, [character_set](
const char c) {
392 if (pointer == input.end()) {
393 return std::string(input);
397 result.reserve(input.length());
399 result.append(input.substr(0, std::distance(input.begin(), pointer)));
401 for (; pointer != input.end(); pointer++) {
412template <
bool append>
413bool percent_encode(
const std::string_view input,
const uint8_t character_set[],
415 ada_log(
"percent_encode ", input,
" to output string while ",
416 append ?
"appending" :
"overwriting");
418 std::find_if(input.begin(), input.end(), [character_set](
const char c) {
419 return character_sets::bit_at(character_set, c);
421 ada_log(
"percent_encode done checking, moved to ",
422 std::distance(input.begin(), pointer));
425 if (pointer == input.end()) {
426 ada_log(
"percent_encode encoding not needed.");
429 if constexpr (!append) {
432 ada_log(
"percent_encode appending ", std::distance(input.begin(), pointer),
434 out.append(input.data(), std::distance(input.begin(), pointer));
435 ada_log(
"percent_encode processing ", std::distance(pointer, input.end()),
437 for (; pointer != input.end(); pointer++) {
447bool to_ascii(std::optional<std::string>& out,
const std::string_view plain,
448 size_t first_percent) {
449 std::string percent_decoded_buffer;
450 std::string_view input = plain;
451 if (first_percent != std::string_view::npos) {
452 percent_decoded_buffer = unicode::percent_decode(plain, first_percent);
453 input = percent_decoded_buffer;
458 idna_ascii.data(), idna_ascii.size())) {
461 out = std::move(idna_ascii);
465std::string percent_encode(
const std::string_view input,
466 const uint8_t character_set[],
size_t index) {
468 out.append(input.data(), index);
469 auto pointer = input.begin() + index;
470 for (; pointer != input.end(); pointer++) {
Includes all definitions for Ada.
Definitions of the character sets used by unicode functions.
Common definitions for cross-platform compiler support.
#define ADA_PUSH_DISABLE_ALL_WARNINGS
#define ADA_POP_DISABLE_WARNINGS
#define ada_really_inline
ada_really_inline constexpr bool bit_at(const uint8_t a[], const uint8_t i)
std::string to_ascii(std::string_view ut8_string)
bool contains_forbidden_domain_code_point(std::string_view ascii_string)
Includes the declarations for unicode operations.
static constexpr std::array< uint8_t, 256 > is_forbidden_domain_code_point_table
static constexpr std::array< uint8_t, 256 > is_forbidden_domain_code_point_table_or_upper
static constexpr char hex_to_binary_table[]
constexpr uint64_t broadcast(uint8_t v) noexcept
constexpr std::string_view table_is_double_dot_path_segment[]
constexpr bool is_tabs_or_newline(char c) noexcept
static constexpr std::array< uint8_t, 256 > is_forbidden_host_code_point_table
static constexpr std::array< bool, 256 > is_alnum_plus_table
tl::expected< result_type, ada::errors > result
Definitions for all unicode specific functions.