22#include <riscv_vector.h>
30 return c ==
'\r' || c ==
'\n' || c ==
'\t';
34 return 0x101010101010101ull * v;
37constexpr bool to_lower_ascii(
char* input,
size_t length)
noexcept {
39 uint64_t broadcast_Ap =
broadcast(128 -
'A');
40 uint64_t broadcast_Zp =
broadcast(128 -
'Z' - 1);
41 uint64_t non_ascii = 0;
44 for (; i + 7 < length; i += 8) {
46 memcpy(&word, input + i,
sizeof(word));
47 non_ascii |= (word & broadcast_80);
49 (((word + broadcast_Ap) ^ (word + broadcast_Zp)) & broadcast_80) >> 2;
50 memcpy(input + i, &word,
sizeof(word));
54 memcpy(&word, input + i, length - i);
55 non_ascii |= (word & broadcast_80);
57 (((word + broadcast_Ap) ^ (word + broadcast_Zp)) & broadcast_80) >> 2;
58 memcpy(input + i, &word, length - i);
60 return non_ascii == 0;
64 std::string_view user_input)
noexcept {
66 if (user_input.size() < 16) {
75 _mm_setr_epi8(1, 0, 0, 0, 0, 0, 0, 0, 0, 9, 10, 0, 0, 13, 0, 0);
76 __m128i running = _mm_setzero_si128();
77 for (; i + 15 < user_input.size(); i += 16) {
78 __m128i word = _mm_loadu_si128((
const __m128i*)(user_input.data() + i));
80 __m128i shuffled = _mm_shuffle_epi8(rnt, word);
82 __m128i matches = _mm_cmpeq_epi8(shuffled, word);
83 running = _mm_or_si128(running, matches);
85 if (i < user_input.size()) {
86 __m128i word = _mm_loadu_si128(
87 (
const __m128i*)(user_input.data() + user_input.length() - 16));
88 __m128i shuffled = _mm_shuffle_epi8(rnt, word);
89 __m128i matches = _mm_cmpeq_epi8(shuffled, word);
90 running = _mm_or_si128(running, matches);
92 return _mm_movemask_epi8(running) != 0;
96 std::string_view user_input)
noexcept {
98 if (user_input.size() < 16) {
115 static uint8_t rnt_array[16] = {1, 0, 0, 0, 0, 0, 0, 0,
116 0, 9, 10, 0, 0, 13, 0, 0};
117 const uint8x16_t rnt = vld1q_u8(rnt_array);
119 uint8x16_t running{0};
120 for (; i + 15 < user_input.size(); i += 16) {
121 uint8x16_t word = vld1q_u8((
const uint8_t*)user_input.data() + i);
123 running = vorrq_u8(running, vceqq_u8(vqtbl1q_u8(rnt, word), word));
125 if (i < user_input.size()) {
127 vld1q_u8((
const uint8_t*)user_input.data() + user_input.length() - 16);
128 running = vorrq_u8(running, vceqq_u8(vqtbl1q_u8(rnt, word), word));
130 return vmaxvq_u32(vreinterpretq_u32_u8(running)) != 0;
134 std::string_view user_input)
noexcept {
136 if (user_input.size() < 16) {
141 const __m128i mask1 = _mm_set1_epi8(
'\r');
142 const __m128i mask2 = _mm_set1_epi8(
'\n');
143 const __m128i mask3 = _mm_set1_epi8(
'\t');
146 for (; i + 15 < user_input.size(); i += 16) {
147 __m128i word = _mm_loadu_si128((
const __m128i*)(user_input.data() + i));
148 running = _mm_or_si128(
149 _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
150 _mm_cmpeq_epi8(word, mask2))),
151 _mm_cmpeq_epi8(word, mask3));
153 if (i < user_input.size()) {
154 __m128i word = _mm_loadu_si128(
155 (
const __m128i*)(user_input.data() + user_input.length() - 16));
156 running = _mm_or_si128(
157 _mm_or_si128(running, _mm_or_si128(_mm_cmpeq_epi8(word, mask1),
158 _mm_cmpeq_epi8(word, mask2))),
159 _mm_cmpeq_epi8(word, mask3));
161 return _mm_movemask_epi8(running) != 0;
165 std::string_view user_input)
noexcept {
167 if (user_input.size() < 16) {
172 const __m128i mask1 = __lsx_vrepli_b(
'\r');
173 const __m128i mask2 = __lsx_vrepli_b(
'\n');
174 const __m128i mask3 = __lsx_vrepli_b(
'\t');
177 for (; i + 15 < user_input.size(); i += 16) {
178 __m128i word = __lsx_vld((
const __m128i*)(user_input.data() + i), 0);
179 running = __lsx_vor_v(
180 __lsx_vor_v(running, __lsx_vor_v(__lsx_vseq_b(word, mask1),
181 __lsx_vseq_b(word, mask2))),
182 __lsx_vseq_b(word, mask3));
184 if (i < user_input.size()) {
185 __m128i word = __lsx_vld(
186 (
const __m128i*)(user_input.data() + user_input.length() - 16), 0);
187 running = __lsx_vor_v(
188 __lsx_vor_v(running, __lsx_vor_v(__lsx_vseq_b(word, mask1),
189 __lsx_vseq_b(word, mask2))),
190 __lsx_vseq_b(word, mask3));
192 if (__lsx_bz_v(running))
return false;
197 std::string_view user_input)
noexcept {
198 uint8_t* src = (uint8_t*)user_input.data();
199 for (
size_t vl, n = user_input.size(); n > 0; n -= vl, src += vl) {
200 vl = __riscv_vsetvl_e8m1(n);
201 vuint8m1_t v = __riscv_vle8_v_u8m1(src, vl);
202 vbool8_t m1 = __riscv_vmseq(v,
'\r', vl);
203 vbool8_t m2 = __riscv_vmseq(v,
'\n', vl);
204 vbool8_t m3 = __riscv_vmseq(v,
'\t', vl);
205 vbool8_t m = __riscv_vmor(__riscv_vmor(m1, m2, vl), m3, vl);
206 long idx = __riscv_vfirst(m, vl);
207 if (idx >= 0)
return true;
213 std::string_view user_input)
noexcept {
214 auto has_zero_byte = [](uint64_t v) {
215 return ((v - 0x0101010101010101) & ~(v) & 0x8080808080808080);
222 for (; i + 7 < user_input.size(); i += 8) {
224 memcpy(&word, user_input.data() + i,
sizeof(word));
225 uint64_t xor1 = word ^ mask1;
226 uint64_t xor2 = word ^ mask2;
227 uint64_t xor3 = word ^ mask3;
228 running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
230 if (i < user_input.size()) {
232 memcpy(&word, user_input.data() + i, user_input.size() - i);
233 uint64_t xor1 = word ^ mask1;
234 uint64_t xor2 = word ^ mask2;
235 uint64_t xor3 = word ^ mask3;
236 running |= has_zero_byte(xor1) | has_zero_byte(xor2) | has_zero_byte(xor3);
248 std::array<uint8_t, 256>
result{};
249 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
250 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|'}) {
257 const char c)
noexcept {
263 std::array<uint8_t, 256>
result{};
264 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
265 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|',
'%'}) {
268 for (uint8_t c = 0; c <= 32; c++) {
271 for (
size_t c = 127; c < 255; c++) {
280 const char c)
noexcept {
285 const char* input,
size_t length)
noexcept {
287 uint8_t accumulator{};
288 for (; i + 4 <= length; i += 4) {
294 for (; i < length; i++) {
300constexpr static std::array<uint8_t, 256>
302 std::array<uint8_t, 256>
result{};
303 for (uint8_t c : {
'\0',
'\x09',
'\x0a',
'\x0d',
' ',
'#',
'/',
':',
'<',
304 '>',
'?',
'@',
'[',
'\\',
']',
'^',
'|',
'%'}) {
307 for (uint8_t c =
'A'; c <=
'Z'; c++) {
310 for (uint8_t c = 0; c <= 32; c++) {
313 for (
size_t c = 127; c < 255; c++) {
320contains_forbidden_domain_code_point_or_upper(
const char* input,
321 size_t length)
noexcept {
323 uint8_t accumulator{};
324 for (; i + 4 <= length; i += 4) {
334 for (; i < length; i++) {
343 std::array<bool, 256>
result{};
344 for (
size_t c = 0; c < 256; c++) {
345 result[c] = (c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'z') ||
346 (c >=
'A' && c <=
'Z') || c ==
'+' || c ==
'-' || c ==
'.';
359 return (c >=
'0' && c <=
'9') || (c >=
'A' && c <=
'F') ||
360 (c >=
'a' && c <=
'f');
366 return (c >=
'0' && c <=
'9');
375 return (
unsigned char)c <=
' ';
379 const char c)
noexcept {
380 return c ==
'\t' || c ==
'\n' || c ==
'\r';
384 "..",
"%2e.",
".%2e",
"%2e%2e"};
387 std::string_view input)
noexcept {
392 uint64_t half_length = uint64_t(input.size()) / 2;
393 if (half_length - 1 > 2) {
398 if ((input[0] !=
'.') && (input[0] !=
'%')) {
402 int hash_value = (input.size() + (
unsigned)(input[0])) & 3;
404 if (target.size() != input.size()) {
409 auto prefix_equal_unsafe = [](std::string_view a, std::string_view b) {
411 memcpy(&A, a.data(),
sizeof(A));
412 memcpy(&B, b.data(),
sizeof(B));
415 if (!prefix_equal_unsafe(input, target)) {
418 for (
size_t i = 2; i < input.size(); i++) {
420 if ((uint8_t((c | 0x20) - 0x61) <= 25 ? (c | 0x20) : c) != target[i]) {
436 std::string_view input)
noexcept {
437 return input ==
"." || input ==
"%2e" || input ==
"%2E";
441 return (c >=
'0' && c <=
'9') || (c >=
'a' && c <=
'f');
445 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11,
446 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15};
448unsigned constexpr convert_hex_to_binary(
const char c)
noexcept {
452std::string percent_decode(
const std::string_view input,
size_t first_percent) {
455 if (first_percent == std::string_view::npos) {
456 return std::string(input);
459 dest.reserve(input.length());
460 dest.append(input.substr(0, first_percent));
461 const char* pointer = input.data() + first_percent;
462 const char* end = input.data() + input.size();
465 while (pointer < end) {
466 const char ch = pointer[0];
467 size_t remaining = end - pointer - 1;
468 if (ch !=
'%' || remaining < 2 ||
470 (!is_ascii_hex_digit(pointer[1]) ||
471 !is_ascii_hex_digit(pointer[2])))) {
475 unsigned a = convert_hex_to_binary(pointer[1]);
476 unsigned b = convert_hex_to_binary(pointer[2]);
477 char c =
static_cast<char>(a * 16 + b);
485std::string percent_encode(
const std::string_view input,
486 const uint8_t character_set[]) {
487 auto pointer = std::ranges::find_if(input, [character_set](
const char c) {
491 if (pointer == input.end()) {
492 return std::string(input);
496 result.reserve(input.length());
498 result.append(input.substr(0, std::distance(input.begin(), pointer)));
500 for (; pointer != input.end(); pointer++) {
511template <
bool append>
512bool percent_encode(
const std::string_view input,
const uint8_t character_set[],
514 ada_log(
"percent_encode ", input,
" to output string while ",
515 append ?
"appending" :
"overwriting");
516 auto pointer = std::ranges::find_if(input, [character_set](
const char c) {
519 ada_log(
"percent_encode done checking, moved to ",
520 std::distance(input.begin(), pointer));
523 if (pointer == input.end()) {
524 ada_log(
"percent_encode encoding not needed.");
527 if constexpr (!append) {
530 ada_log(
"percent_encode appending ", std::distance(input.begin(), pointer),
533 out.append(input.data(), std::distance(input.begin(), pointer));
534 ada_log(
"percent_encode processing ", std::distance(pointer, input.end()),
536 for (; pointer != input.end(); pointer++) {
546bool to_ascii(std::optional<std::string>& out,
const std::string_view plain,
547 size_t first_percent) {
548 std::string percent_decoded_buffer;
549 std::string_view input = plain;
550 if (first_percent != std::string_view::npos) {
551 percent_decoded_buffer = unicode::percent_decode(plain, first_percent);
552 input = percent_decoded_buffer;
556 if (idna_ascii.empty() || contains_forbidden_domain_code_point(
557 idna_ascii.data(), idna_ascii.size())) {
560 out = std::move(idna_ascii);
564std::string percent_encode(
const std::string_view input,
565 const uint8_t character_set[],
size_t index) {
568 out.append(input.data(), index);
569 auto pointer = input.begin() + index;
570 for (; pointer != input.end(); pointer++) {
Definitions of the character sets used by unicode functions.
Declaration of the character sets used by unicode functions.
Cross-platform compiler macros and common definitions.
#define ADA_PUSH_DISABLE_ALL_WARNINGS
#define ADA_POP_DISABLE_WARNINGS
#define ada_really_inline
ada_really_inline constexpr bool bit_at(const uint8_t a[], const uint8_t i)
std::string to_ascii(std::string_view ut8_string)
Includes the declarations for unicode operations.
static constexpr std::array< uint8_t, 256 > is_forbidden_domain_code_point_table
static constexpr std::array< uint8_t, 256 > is_forbidden_domain_code_point_table_or_upper
static constexpr char hex_to_binary_table[]
constexpr uint64_t broadcast(uint8_t v) noexcept
constexpr std::string_view table_is_double_dot_path_segment[]
constexpr bool is_tabs_or_newline(char c) noexcept
static constexpr std::array< uint8_t, 256 > is_forbidden_host_code_point_table
static constexpr std::array< bool, 256 > is_alnum_plus_table
tl::expected< result_type, ada::errors > result
Definitions for all unicode specific functions.