21 return c ==
'\r' || c ==
'\n' || c ==
'\t';
25 return 0x101010101010101ull * v;
28constexpr bool to_lower_ascii(
char* input,
size_t length)
noexcept {
30 uint64_t broadcast_Ap =
broadcast(128 -
'A');
31 uint64_t broadcast_Zp =
broadcast(128 -
'Z' - 1);
32 uint64_t non_ascii = 0;
35 for (; i + 7 < length; i += 8) {
37 memcpy(&word, input + i,
sizeof(word));
38 non_ascii |= (word & broadcast_80);
40 (((word + broadcast_Ap) ^ (word + broadcast_Zp)) & broadcast_80) >> 2;
41 memcpy(input + i, &word,
sizeof(word));
45 memcpy(&word, input + i, length - i);
46 non_ascii |= (word & broadcast_80);
48 (((word + broadcast_Ap) ^ (word + broadcast_Zp)) & broadcast_80) >> 2;
49 memcpy(input + i, &word, length - i);
51 return non_ascii == 0;
55 std::string_view user_input)
noexcept {
57 if (user_input.size() < 16) {
58 return std::any_of(user_input.begin(), user_input.end(),
75 static uint8_t rnt_array[16] = {1, 0, 0, 0, 0, 0, 0, 0,
76 0, 9, 10, 0, 0, 13, 0, 0};
77 const uint8x16_t rnt = vld1q_u8(rnt_array);
79 uint8x16_t running{0};
80 for (; i + 15 < user_input.size(); i += 16) {
81 uint8x16_t word = vld1q_u8((
const uint8_t*)user_input.data() + i);
83 running = vorrq_u8(running, vceqq_u8(vqtbl1q_u8(rnt, word), word));
85 if (i < user_input.size()) {
87 vld1q_u8((
const uint8_t*)user_input.data() + user_input.length() - 16);
88 running = vorrq_u8(running, vceqq_u8(vqtbl1q_u8(rnt, word), word));
90 return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0;
94 std::string_view user_input)
noexcept {
96 if (user_input.size() < 16) {
97 return std::any_of(user_input.begin(), user_input.end(),
102 const __m128i mask1 = _mm_set1_epi8(
'\r');
103 const __m128i mask2 = _mm_set1_epi8(
'\n');
104 const __m128i mask3 = _mm_set1_epi8(
'\t');
107 for (; i + 15 < user_input.size(); i += 16) {
108 __m128i word = _mm_loadu_si128((
const __m128i*)(user_input.data() + i));
109 running = _mm_or_si128(
110 _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
111 _mm_cmpeq_epi8(word, mask2))),
112 _mm_cmpeq_epi8(word, mask3));
114 if (i < user_input.size()) {
115 __m128i word = _mm_loadu_si128(
116 (
const __m128i*)(user_input.data() + user_input.length() - 16));
117 running = _mm_or_si128(
118 _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
119 _mm_cmpeq_epi8(word, mask2))),
120 _mm_cmpeq_epi8(word, mask3));
122 return _mm_movemask_epi8(running) != 0;
126 std::string_view user_input)
noexcept {
127 auto has_zero_byte = [](uint64_t v) {
128 return ((v - 0x0101010101010101) & ~(v) & 0x8080808080808080);
135 for (; i + 7 < user_input.size(); i += 8) {
137 memcpy(&word, user_input.data() + i,
sizeof(word));
138 uint64_t xor1 = word ^ mask1;
139 uint64_t xor2 = word ^ mask2;
140 uint64_t xor3 = word ^ mask3;
141 running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
143 if (i < user_input.size()) {
145 memcpy(&word, user_input.data() + i, user_input.size() - i);
146 uint64_t xor1 = word ^ mask1;
147 uint64_t xor2 = word ^ mask2;
148 uint64_t xor3 = word ^ mask3;
149 running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
161 std::array<uint8_t, 256>
result{};
162 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
163 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|'}) {
170 const char c)
noexcept {
176 std::array<uint8_t, 256>
result{};
177 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
178 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|',
'%'}) {
181 for (uint8_t c = 0; c <= 32; c++) {
184 for (
size_t c = 127; c < 255; c++) {
193 const char c)
noexcept {
198 const char* input,
size_t length)
noexcept {
200 uint8_t accumulator{};
201 for (; i + 4 <= length; i += 4) {
207 for (; i < length; i++) {
213constexpr static std::array<uint8_t, 256>
215 std::array<uint8_t, 256>
result{};
216 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
217 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|',
'%'}) {
220 for (uint8_t c =
'A'; c <=
'Z'; c++) {
223 for (uint8_t c = 0; c <= 32; c++) {
226 for (
size_t c = 127; c < 255; c++) {
233contains_forbidden_domain_code_point_or_upper(
const char* input,
234 size_t length)
noexcept {
236 uint8_t accumulator{};
237 for (; i + 4 <= length; i += 4) {
247 for (; i < length; i++) {
256 std::array<bool, 256>
result{};
257 for (
size_t c = 0; c < 256; c++) {
258 result[c] = (c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'z') ||
259 (c >=
'A' && c <=
'Z') || c ==
'+' || c ==
'-' || c ==
'.';
272 return (c >=
'0' && c <=
'9') || (c >=
'A' && c <=
'F') ||
273 (c >=
'a' && c <=
'f');
279 return (c >=
'0' && c <=
'9');
288 return (
unsigned char)c <=
' ';
292 const char c)
noexcept {
293 return c ==
'\t' || c ==
'\n' || c ==
'\r';
297 "..",
"%2e.",
".%2e",
"%2e%2e"};
300 std::string_view input)
noexcept {
305 uint64_t half_length = uint64_t(input.size()) / 2;
306 if (half_length - 1 > 2) {
311 if ((input[0] !=
'.') && (input[0] !=
'%')) {
315 int hash_value = (input.size() + (
unsigned)(input[0])) & 3;
317 if (target.size() != input.size()) {
322 auto prefix_equal_unsafe = [](std::string_view a, std::string_view b) {
324 memcpy(&A, a.data(),
sizeof(A));
325 memcpy(&B, b.data(),
sizeof(B));
328 if (!prefix_equal_unsafe(input, target)) {
331 for (
size_t i = 2; i < input.size(); i++) {
333 if ((uint8_t((c | 0x20) - 0x61) <= 25 ? (c | 0x20) : c) != target[i]) {
349 std::string_view input)
noexcept {
350 return input ==
"." || input ==
"%2e" || input ==
"%2E";
354 return (c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'f');
358 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11,
359 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15};
361unsigned constexpr convert_hex_to_binary(
const char c)
noexcept {
365std::string percent_decode(
const std::string_view input,
size_t first_percent) {
368 if (first_percent == std::string_view::npos) {
369 return std::string(input);
372 dest.reserve(input.length());
373 dest.append(input.substr(0, first_percent));
374 const char* pointer = input.data() + first_percent;
375 const char* end = input.data() + input.size();
378 while (pointer < end) {
379 const char ch = pointer[0];
380 size_t remaining = end - pointer - 1;
381 if (ch !=
'%' || remaining < 2 ||
383 (!is_ascii_hex_digit(pointer[1]) ||
384 !is_ascii_hex_digit(pointer[2])))) {
388 unsigned a = convert_hex_to_binary(pointer[1]);
389 unsigned b = convert_hex_to_binary(pointer[2]);
390 char c =
static_cast<char>(a * 16 + b);
398std::string percent_encode(
const std::string_view input,
399 const uint8_t character_set[]) {
400 auto pointer = std::ranges::find_if(input, [character_set](
const char c) {
404 if (pointer == input.end()) {
405 return std::string(input);
409 result.reserve(input.length());
411 result.append(input.substr(0, std::distance(input.begin(), pointer)));
413 for (; pointer != input.end(); pointer++) {
424template <
bool append>
425bool percent_encode(
const std::string_view input,
const uint8_t character_set[],
427 ada_log(
"percent_encode ", input,
" to output string while ",
428 append ?
"appending" :
"overwriting");
430 std::find_if(input.begin(), input.end(), [character_set](
const char c) {
431 return character_sets::bit_at(character_set, c);
433 ada_log(
"percent_encode done checking, moved to ",
434 std::distance(input.begin(), pointer));
437 if (pointer == input.end()) {
438 ada_log(
"percent_encode encoding not needed.");
441 if constexpr (!append) {
444 ada_log(
"percent_encode appending ", std::distance(input.begin(), pointer),
446 out.append(input.data(), std::distance(input.begin(), pointer));
447 ada_log(
"percent_encode processing ", std::distance(pointer, input.end()),
449 for (; pointer != input.end(); pointer++) {
459bool to_ascii(std::optional<std::string>& out,
const std::string_view plain,
460 size_t first_percent) {
461 std::string percent_decoded_buffer;
462 std::string_view input = plain;
463 if (first_percent != std::string_view::npos) {
464 percent_decoded_buffer = unicode::percent_decode(plain, first_percent);
465 input = percent_decoded_buffer;
469 if (idna_ascii.empty() || contains_forbidden_domain_code_point(
470 idna_ascii.data(), idna_ascii.size())) {
473 out = std::move(idna_ascii);
477std::string percent_encode(
const std::string_view input,
478 const uint8_t character_set[],
size_t index) {
480 out.append(input.data(), index);
481 auto pointer = input.begin() + index;
482 for (; pointer != input.end(); pointer++) {
Definitions of the character sets used by unicode functions.
Declaration of the character sets used by unicode functions.
Common definitions for cross-platform compiler support.
#define ADA_PUSH_DISABLE_ALL_WARNINGS
#define ADA_POP_DISABLE_WARNINGS
#define ada_really_inline
ada_really_inline constexpr bool bit_at(const uint8_t a[], const uint8_t i)
std::string to_ascii(std::string_view ut8_string)
Includes the declarations for unicode operations.
static constexpr std::array< uint8_t, 256 > is_forbidden_domain_code_point_table
static constexpr std::array< uint8_t, 256 > is_forbidden_domain_code_point_table_or_upper
static constexpr char hex_to_binary_table[]
constexpr uint64_t broadcast(uint8_t v) noexcept
constexpr std::string_view table_is_double_dot_path_segment[]
constexpr bool is_tabs_or_newline(char c) noexcept
static constexpr std::array< uint8_t, 256 > is_forbidden_host_code_point_table
static constexpr std::array< bool, 256 > is_alnum_plus_table
tl::expected< result_type, ada::errors > result
Definitions for all unicode specific functions.