Ada 2.9.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
parse.cc
Go to the documentation of this file.
1#include <fuzzer/FuzzedDataProvider.h>
2
3#include <cstdio>
4#include <memory>
5#include <string>
6
7#include "ada.cpp"
8#include "ada.h"
9
10bool is_valid_utf8_string(const char *buf, size_t len) {
11 const uint8_t *data = reinterpret_cast<const uint8_t *>(buf);
12 uint64_t pos = 0;
13 uint32_t code_point = 0;
14 while (pos < len) {
15 uint64_t next_pos = pos + 16;
16 if (next_pos <= len) { // if it is safe to read 16 more bytes, check that
17 // they are ascii
18 uint64_t v1;
19 std::memcpy(&v1, data + pos, sizeof(uint64_t));
20 uint64_t v2;
21 std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
22 uint64_t v{v1 | v2};
23 if ((v & 0x8080808080808080) == 0) {
24 pos = next_pos;
25 continue;
26 }
27 }
28 unsigned char byte = data[pos];
29 while (byte < 0b10000000) {
30 if (++pos == len) {
31 return true;
32 }
33 byte = data[pos];
34 }
35
36 if ((byte & 0b11100000) == 0b11000000) {
37 next_pos = pos + 2;
38 if (next_pos > len) {
39 return false;
40 }
41 if ((data[pos + 1] & 0b11000000) != 0b10000000) {
42 return false;
43 }
44 code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
45 if ((code_point < 0x80) || (0x7ff < code_point)) {
46 return false;
47 }
48 } else if ((byte & 0b11110000) == 0b11100000) {
49 next_pos = pos + 3;
50 if (next_pos > len) {
51 return false;
52 }
53 if ((data[pos + 1] & 0b11000000) != 0b10000000) {
54 return false;
55 }
56 if ((data[pos + 2] & 0b11000000) != 0b10000000) {
57 return false;
58 }
59 code_point = (byte & 0b00001111) << 12 |
60 (data[pos + 1] & 0b00111111) << 6 |
61 (data[pos + 2] & 0b00111111);
62 if ((code_point < 0x800) || (0xffff < code_point) ||
63 (0xd7ff < code_point && code_point < 0xe000)) {
64 return false;
65 }
66 } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
67 next_pos = pos + 4;
68 if (next_pos > len) {
69 return false;
70 }
71 if ((data[pos + 1] & 0b11000000) != 0b10000000) {
72 return false;
73 }
74 if ((data[pos + 2] & 0b11000000) != 0b10000000) {
75 return false;
76 }
77 if ((data[pos + 3] & 0b11000000) != 0b10000000) {
78 return false;
79 }
80 code_point =
81 (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
82 (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
83 if (code_point <= 0xffff || 0x10ffff < code_point) {
84 return false;
85 }
86 } else {
87 return false;
88 }
89 pos = next_pos;
90 }
91 return true;
92}
93
94extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
95 FuzzedDataProvider fdp(data, size);
96 std::string source = fdp.ConsumeRandomLengthString(256);
97
98 // volatile forces the compiler to store the results without undue
99 // optimizations
100 volatile size_t length = 0;
101
102 auto parse_url = ada::parse<ada::url>(source);
103 auto parse_url_aggregator = ada::parse<ada::url_aggregator>(source);
104
105 if (is_valid_utf8_string(source.data(), source.length())) {
106 if (parse_url.has_value() ^ parse_url_aggregator.has_value()) {
107 printf("Source used to parse: %s", source.c_str());
108 abort();
109 }
110 }
111
112 if (parse_url) {
113 length += parse_url->get_href().size();
114 length += parse_url->get_origin().size();
115 }
116
117 if (parse_url_aggregator) {
118 length += parse_url_aggregator->get_href().size();
119 length += parse_url_aggregator->get_origin().size();
120
121 volatile bool is_parse_url_aggregator_output_valid = false;
122 is_parse_url_aggregator_output_valid = parse_url_aggregator->validate();
123
124 assert(parse_url->get_protocol() == parse_url_aggregator->get_protocol());
125 assert(parse_url->get_href() == parse_url_aggregator->get_href());
126
127 parse_url->set_href(source);
128 parse_url_aggregator->set_href(source);
129 assert(parse_url->get_href() == parse_url_aggregator->get_href());
130 }
131
135 auto out_url = ada::parse<ada::url>("https://www.ada-url.com");
136
137 if (out_url) {
138 out_url->set_protocol(source);
139 out_url->set_username(source);
140 out_url->set_password(source);
141 out_url->set_hostname(source);
142 out_url->set_host(source);
143 out_url->set_pathname(source);
144 out_url->set_search(source);
145 out_url->set_hash(source);
146 out_url->set_port(source);
147
148 // getters
149 length += out_url->get_protocol().size();
150 length += out_url->get_username().size();
151 length += out_url->get_password().size();
152 length += out_url->get_hostname().size();
153 length += out_url->get_host().size();
154 length += out_url->get_pathname().size();
155 length += out_url->get_search().size();
156 length += out_url->get_hash().size();
157 length += out_url->get_origin().size();
158 length += out_url->get_port().size();
159
160 length += out_url->to_string().size();
161 }
162
166 auto out_aggregator =
167 ada::parse<ada::url_aggregator>("https://www.ada-url.com");
168
169 if (out_aggregator) {
170 out_aggregator->set_protocol(source);
171 out_aggregator->set_username(source);
172 out_aggregator->set_password(source);
173 out_aggregator->set_hostname(source);
174 out_aggregator->set_host(source);
175 out_aggregator->set_pathname(source);
176 out_aggregator->set_search(source);
177 out_aggregator->set_hash(source);
178 out_aggregator->set_port(source);
179
180 // getters
181 length += out_aggregator->get_protocol().size();
182 length += out_aggregator->get_username().size();
183 length += out_aggregator->get_password().size();
184 length += out_aggregator->get_hostname().size();
185 length += out_aggregator->get_host().size();
186 length += out_aggregator->get_pathname().size();
187 length += out_aggregator->get_search().size();
188 length += out_aggregator->get_hash().size();
189 length += out_aggregator->get_origin().size();
190 length += out_aggregator->get_port().size();
191
192 length += out_aggregator->to_string().size();
193
194 volatile bool is_output_valid = false;
195 is_output_valid = out_aggregator->validate();
196
197 // Printing due to dead-code elimination
198 printf("diagram %s\n", out_aggregator->to_diagram().c_str());
199
200 // clear methods
201 out_aggregator->clear_port();
202 out_aggregator->clear_search();
203 out_aggregator->clear_hash();
204 }
205
209 length += ada::href_from_file(source).size();
210
214 bool is_valid = ada::checkers::verify_dns_length(source);
215
216 // Only used for avoiding dead-code elimination
217 if (is_valid) {
218 printf("dns length is valid\n");
219 }
220
221 // Only used for avoiding dead-code elimination
222 printf("length of url is %d\n", length);
223
224 return 0;
225} // extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
Includes all definitions for Ada.
std::string href_from_file(std::string_view path)
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)
bool is_valid_utf8_string(const char *buf, size_t len)
Definition parse.cc:10
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
Definition parse.cc:94