Ada 3.0.1
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_INL_H
6#define ADA_URL_PATTERN_INL_H
7
8#include "ada/common_defs.h"
10#include "ada/url_pattern.h"
11
12#include <string_view>
13
14namespace ada {
15
16inline bool url_pattern_init::operator==(const url_pattern_init& other) const {
17 return protocol == other.protocol && username == other.username &&
18 password == other.password && hostname == other.hostname &&
19 port == other.port && search == other.search && hash == other.hash &&
20 pathname == other.pathname;
21}
22
24 const url_pattern_component_result& other) const {
25 return input == other.input && groups == other.groups;
26}
27
28template <url_pattern_regex::regex_concept regex_provider>
31 std::string_view input,
32 std::vector<std::optional<std::string>>&& exec_result) {
33 // Let result be a new URLPatternComponentResult.
34 // Set result["input"] to input.
35 // Let groups be a record<USVString, (USVString or undefined)>.
36 auto result =
37 url_pattern_component_result{.input = std::string(input), .groups = {}};
38
39 // Optimization: Let's reserve the size.
40 result.groups.reserve(exec_result.size());
41
42 // We explicitly start iterating from 0 even though the spec
43 // says we should start from 1. This case is handled by the
44 // std_regex_provider.
45 for (size_t index = 0; index < exec_result.size(); index++) {
46 result.groups.insert({
47 group_name_list[index],
48 std::move(exec_result[index]),
49 });
50 }
51 return result;
52}
53
54template <url_pattern_regex::regex_concept regex_provider>
57 // Return this's associated URL pattern's protocol component's pattern string.
58 return protocol_component.pattern;
59}
60template <url_pattern_regex::regex_concept regex_provider>
63 // Return this's associated URL pattern's username component's pattern string.
64 return username_component.pattern;
65}
66template <url_pattern_regex::regex_concept regex_provider>
69 // Return this's associated URL pattern's password component's pattern string.
70 return password_component.pattern;
71}
72template <url_pattern_regex::regex_concept regex_provider>
75 // Return this's associated URL pattern's hostname component's pattern string.
76 return hostname_component.pattern;
77}
78template <url_pattern_regex::regex_concept regex_provider>
81 // Return this's associated URL pattern's port component's pattern string.
82 return port_component.pattern;
83}
84template <url_pattern_regex::regex_concept regex_provider>
87 // Return this's associated URL pattern's pathname component's pattern string.
88 return pathname_component.pattern;
89}
90template <url_pattern_regex::regex_concept regex_provider>
93 // Return this's associated URL pattern's search component's pattern string.
94 return search_component.pattern;
95}
96template <url_pattern_regex::regex_concept regex_provider>
99 // Return this's associated URL pattern's hash component's pattern string.
100 return hash_component.pattern;
101}
102template <url_pattern_regex::regex_concept regex_provider>
104 return ignore_case_;
105}
106template <url_pattern_regex::regex_concept regex_provider>
108 // If this's associated URL pattern's has regexp groups, then return true.
109 return protocol_component.has_regexp_groups ||
110 username_component.has_regexp_groups ||
111 password_component.has_regexp_groups ||
112 hostname_component.has_regexp_groups ||
113 port_component.has_regexp_groups ||
114 pathname_component.has_regexp_groups ||
115 search_component.has_regexp_groups || hash_component.has_regexp_groups;
116}
117
118inline bool url_pattern_part::is_regexp() const noexcept {
120}
121
123 const {
124 if (delimiter) {
125 return {&delimiter.value(), 1};
126 }
127 return {};
128}
129
131 const {
132 if (prefix) {
133 return {&prefix.value(), 1};
134 }
135 return {};
136}
137
138template <url_pattern_regex::regex_concept regex_provider>
139template <url_pattern_encoding_callback F>
140tl::expected<url_pattern_component<regex_provider>, errors>
142 std::string_view input, F& encoding_callback,
144 ada_log("url_pattern_component::compile input: ", input);
145 // Let part list be the result of running parse a pattern string given input,
146 // options, and encoding callback.
147 auto part_list = url_pattern_helpers::parse_pattern_string(input, options,
148 encoding_callback);
149
150 if (!part_list) {
151 ada_log("parse_pattern_string failed");
152 return tl::unexpected(part_list.error());
153 }
154
155 // Let (regular expression string, name list) be the result of running
156 // generate a regular expression and name list given part list and options.
157 auto [regular_expression_string, name_list] =
159 options);
160
161 ada_log("regular expression string: ", regular_expression_string);
162
163 // Let pattern string be the result of running generate a pattern
164 // string given part list and options.
165 auto pattern_string =
167
168 // Let regular expression be RegExpCreate(regular expression string,
169 // flags). If this throws an exception, catch it, and throw a
170 // TypeError.
171 std::optional<typename regex_provider::regex_type> regular_expression =
172 regex_provider::create_instance(regular_expression_string,
173 options.ignore_case);
174
175 if (!regular_expression) {
176 return tl::unexpected(errors::type_error);
177 }
178
179 // For each part of part list:
180 // - If part’s type is "regexp", then set has regexp groups to true.
181 const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
182 const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
183
184 ada_log("has regexp groups: ", has_regexp_groups);
185
186 // Return a new component whose pattern string is pattern string, regular
187 // expression is regular expression, group name list is name list, and has
188 // regexp groups is has regexp groups.
190 std::move(pattern_string), std::move(*regular_expression),
191 std::move(name_list), has_regexp_groups);
192}
193
194template <url_pattern_regex::regex_concept regex_provider>
196 const url_pattern_input& input, const std::string_view* base_url) {
197 // Return the result of match given this's associated URL pattern, input, and
198 // baseURL if given.
199 return match(input, base_url);
200}
201
202template <url_pattern_regex::regex_concept regex_provider>
204 const url_pattern_input& input, const std::string_view* base_url) {
205 // TODO: Optimization opportunity. Rather than returning `url_pattern_result`
206 // Implement a fast path just like `can_parse()` in ada_url.
207 // Let result be the result of match given this's associated URL pattern,
208 // input, and baseURL if given.
209 // If result is null, return false.
210 if (auto result = match(input, base_url); result.has_value()) {
211 return result->has_value();
212 }
213 return tl::unexpected(errors::type_error);
214}
215
216template <url_pattern_regex::regex_concept regex_provider>
218 const url_pattern_input& input, const std::string_view* base_url_string) {
219 std::string protocol{};
220 std::string username{};
221 std::string password{};
222 std::string hostname{};
223 std::string port{};
224 std::string pathname{};
225 std::string search{};
226 std::string hash{};
227
228 // Let inputs be an empty list.
229 // Append input to inputs.
230 std::vector inputs{input};
231
232 // If input is a URLPatternInit then:
233 if (std::holds_alternative<url_pattern_init>(input)) {
234 ada_log(
235 "url_pattern::match called with url_pattern_init and base_url_string=",
236 base_url_string);
237 // If baseURLString was given, throw a TypeError.
238 if (base_url_string) {
239 ada_log("failed to match because base_url_string was given");
240 return tl::unexpected(errors::type_error);
241 }
242
243 // Let applyResult be the result of process a URLPatternInit given input,
244 // "url", protocol, username, password, hostname, port, pathname, search,
245 // and hash.
246 auto apply_result = url_pattern_init::process(
247 std::get<url_pattern_init>(input), "url", protocol, username, password,
248 hostname, port, pathname, search, hash);
249
250 // If this throws an exception, catch it, and return null.
251 if (!apply_result.has_value()) {
252 ada_log("match returned std::nullopt because process threw");
253 return std::nullopt;
254 }
255
256 // Set protocol to applyResult["protocol"].
257 ADA_ASSERT_TRUE(apply_result->protocol.has_value());
258 protocol = apply_result->protocol.value();
259
260 // Set username to applyResult["username"].
261 ADA_ASSERT_TRUE(apply_result->username.has_value());
262 username = apply_result->username.value();
263
264 // Set password to applyResult["password"].
265 ADA_ASSERT_TRUE(apply_result->password.has_value());
266 password = apply_result->password.value();
267
268 // Set hostname to applyResult["hostname"].
269 ADA_ASSERT_TRUE(apply_result->hostname.has_value());
270 hostname = apply_result->hostname.value();
271
272 // Set port to applyResult["port"].
273 ADA_ASSERT_TRUE(apply_result->port.has_value());
274 port = apply_result->port.value();
275
276 // Set pathname to applyResult["pathname"].
277 ADA_ASSERT_TRUE(apply_result->pathname.has_value());
278 pathname = apply_result->pathname.value();
279
280 // Set search to applyResult["search"].
281 ADA_ASSERT_TRUE(apply_result->search.has_value());
282 if (apply_result->search->starts_with("?")) {
283 search = apply_result->search->substr(1);
284 } else {
285 search = apply_result->search.value();
286 }
287
288 // Set hash to applyResult["hash"].
289 ADA_ASSERT_TRUE(apply_result->hash.has_value());
290 ADA_ASSERT_TRUE(!apply_result->hash->starts_with("#"));
291 hash = apply_result->hash.value();
292 } else {
293 ADA_ASSERT_TRUE(std::holds_alternative<std::string_view>(input));
294
295 // Let baseURL be null.
296 result<url_aggregator> base_url;
297
298 // If baseURLString was given, then:
299 if (base_url_string) {
300 // Let baseURL be the result of parsing baseURLString.
301 base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
302
303 // If baseURL is failure, return null.
304 if (!base_url) {
305 ada_log("match returned std::nullopt because failed to parse base_url=",
306 *base_url_string);
307 return std::nullopt;
308 }
309
310 // Append baseURLString to inputs.
311 inputs.emplace_back(*base_url_string);
312 }
313
314 url_aggregator* base_url_value =
315 base_url.has_value() ? &base_url.value() : nullptr;
316
317 // Set url to the result of parsing input given baseURL.
318 auto url = ada::parse<url_aggregator>(std::get<std::string_view>(input),
319 base_url_value);
320
321 // If url is failure, return null.
322 if (!url) {
323 ada_log("match returned std::nullopt because url failed");
324 return std::nullopt;
325 }
326
327 // Set protocol to url’s scheme.
328 // IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
329 // is removed. Similar work was done on workerd:
330 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
331 protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
332 // Set username to url’s username.
333 username = url->get_username();
334 // Set password to url’s password.
335 password = url->get_password();
336 // Set hostname to url’s host, serialized, or the empty string if the value
337 // is null.
338 hostname = url->get_hostname();
339 // Set port to url’s port, serialized, or the empty string if the value is
340 // null.
341 port = url->get_port();
342 // Set pathname to the result of URL path serializing url.
343 pathname = url->get_pathname();
344 // Set search to url’s query or the empty string if the value is null.
345 // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?'
346 // is removed. Similar work was done on workerd:
347 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
348 if (url->has_search()) {
349 ADA_ASSERT_TRUE(url->get_search().starts_with("?"));
350 search = url->get_search().substr(1);
351 } else {
352 search = "";
353 }
354 // Set hash to url’s fragment or the empty string if the value is null.
355 // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
356 // removed. Similar work was done on workerd:
357 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
358 if (url->has_hash()) {
359 ADA_ASSERT_TRUE(url->get_hash().starts_with("#"));
360 hash = url->get_hash().substr(1);
361 } else {
362 hash = "";
363 }
364 }
365
366 // Let protocolExecResult be RegExpBuiltinExec(urlPattern’s protocol
367 // component's regular expression, protocol).
368 auto protocol_exec_result =
369 regex_provider::regex_search(protocol, protocol_component.regexp);
370
371 // Let usernameExecResult be RegExpBuiltinExec(urlPattern’s username
372 // component's regular expression, username).
373 auto username_exec_result =
374 regex_provider::regex_search(username, username_component.regexp);
375
376 // Let passwordExecResult be RegExpBuiltinExec(urlPattern’s password
377 // component's regular expression, password).
378 auto password_exec_result =
379 regex_provider::regex_search(password, password_component.regexp);
380
381 // Let hostnameExecResult be RegExpBuiltinExec(urlPattern’s hostname
382 // component's regular expression, hostname).
383 auto hostname_exec_result =
384 regex_provider::regex_search(hostname, hostname_component.regexp);
385
386 // Let portExecResult be RegExpBuiltinExec(urlPattern’s port component's
387 // regular expression, port).
388 auto port_exec_result =
389 regex_provider::regex_search(port, port_component.regexp);
390
391 // Let pathnameExecResult be RegExpBuiltinExec(urlPattern’s pathname
392 // component's regular expression, pathname).
393 auto pathname_exec_result =
394 regex_provider::regex_search(pathname, pathname_component.regexp);
395
396 // Let searchExecResult be RegExpBuiltinExec(urlPattern’s search component's
397 // regular expression, search).
398 auto search_exec_result =
399 regex_provider::regex_search(search, search_component.regexp);
400
401 // Let hashExecResult be RegExpBuiltinExec(urlPattern’s hash component's
402 // regular expression, hash).
403 auto hash_exec_result =
404 regex_provider::regex_search(hash, hash_component.regexp);
405
406 // If protocolExecResult, usernameExecResult, passwordExecResult,
407 // hostnameExecResult, portExecResult, pathnameExecResult, searchExecResult,
408 // or hashExecResult are null then return null.
409 if (!protocol_exec_result || !username_exec_result || !password_exec_result ||
410 !hostname_exec_result || !port_exec_result || !pathname_exec_result ||
411 !search_exec_result || !hash_exec_result) {
412 return std::nullopt;
413 }
414
415 // Let result be a new URLPatternResult.
416 auto result = url_pattern_result{};
417 // Set result["inputs"] to inputs.
418 result.inputs = std::move(inputs);
419 // Set result["protocol"] to the result of creating a component match result
420 // given urlPattern’s protocol component, protocol, and protocolExecResult.
421 result.protocol = protocol_component.create_component_match_result(
422 protocol, std::move(*protocol_exec_result));
423
424 // Set result["username"] to the result of creating a component match result
425 // given urlPattern’s username component, username, and usernameExecResult.
426 result.username = username_component.create_component_match_result(
427 username, std::move(*username_exec_result));
428
429 // Set result["password"] to the result of creating a component match result
430 // given urlPattern’s password component, password, and passwordExecResult.
431 result.password = password_component.create_component_match_result(
432 password, std::move(*password_exec_result));
433
434 // Set result["hostname"] to the result of creating a component match result
435 // given urlPattern’s hostname component, hostname, and hostnameExecResult.
436 result.hostname = hostname_component.create_component_match_result(
437 hostname, std::move(*hostname_exec_result));
438
439 // Set result["port"] to the result of creating a component match result given
440 // urlPattern’s port component, port, and portExecResult.
441 result.port = port_component.create_component_match_result(
442 port, std::move(*port_exec_result));
443
444 // Set result["pathname"] to the result of creating a component match result
445 // given urlPattern’s pathname component, pathname, and pathnameExecResult.
446 result.pathname = pathname_component.create_component_match_result(
447 pathname, std::move(*pathname_exec_result));
448
449 // Set result["search"] to the result of creating a component match result
450 // given urlPattern’s search component, search, and searchExecResult.
451 result.search = search_component.create_component_match_result(
452 search, std::move(*search_exec_result));
453
454 // Set result["hash"] to the result of creating a component match result given
455 // urlPattern’s hash component, hash, and hashExecResult.
456 result.hash = hash_component.create_component_match_result(
457 hash, std::move(*hash_exec_result));
458
459 return result;
460}
461
462} // namespace ada
463
464#endif
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
std::vector< std::string > group_name_list
url_pattern_component_result create_component_match_result(std::string_view input, std::vector< std::optional< std::string > > &&exec_result)
bool is_regexp() const noexcept
url_pattern_part_type type
Definition url_pattern.h:68
bool has_regexp_groups() const
std::string_view get_hostname() const ada_lifetime_bound
std::string_view get_port() const ada_lifetime_bound
result< bool > test(const url_pattern_input &input, const std::string_view *base_url=nullptr)
result< std::optional< url_pattern_result > > match(const url_pattern_input &input, const std::string_view *base_url_string=nullptr)
bool ignore_case() const
std::string_view get_password() const ada_lifetime_bound
std::string_view get_protocol() const ada_lifetime_bound
std::string_view get_hash() const ada_lifetime_bound
std::string_view get_username() const ada_lifetime_bound
std::string_view get_pathname() const ada_lifetime_bound
std::string_view get_search() const ada_lifetime_bound
result< std::optional< url_pattern_result > > exec(const url_pattern_input &input, const std::string_view *base_url=nullptr)
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_lifetime_bound
tl::expected< std::vector< url_pattern_part >, errors > parse_pattern_string(std::string_view input, url_pattern_compile_component_options &options, F &encoding_callback)
std::string generate_pattern_string(std::vector< url_pattern_part > &part_list, url_pattern_compile_component_options &options)
std::tuple< std::string, std::vector< std::string > > generate_regular_expression_and_name_list(const std::vector< url_pattern_part > &part_list, url_pattern_compile_component_options options)
Definition ada_idna.h:13
errors
Definition errors.h:10
@ type_error
Definition errors.h:10
template ada::result< url_aggregator > parse< url_aggregator >(std::string_view input, const url_aggregator *base_url)
tl::expected< result_type, ada::errors > result
std::variant< std::string_view, url_pattern_init > url_pattern_input
Lightweight URL struct.
std::string_view get_prefix() const ada_warn_unused
std::string_view get_delimiter() const ada_warn_unused
bool operator==(const url_pattern_component_result &) const
std::unordered_map< std::string, std::optional< std::string > > groups
std::optional< std::string > port
std::optional< std::string > protocol
static tl::expected< url_pattern_init, errors > process(url_pattern_init init, std::string_view type, std::optional< std::string_view > protocol=std::nullopt, std::optional< std::string_view > username=std::nullopt, std::optional< std::string_view > password=std::nullopt, std::optional< std::string_view > hostname=std::nullopt, std::optional< std::string_view > port=std::nullopt, std::optional< std::string_view > pathname=std::nullopt, std::optional< std::string_view > search=std::nullopt, std::optional< std::string_view > hash=std::nullopt)
std::optional< std::string > password
std::optional< std::string > hostname
std::optional< std::string > search
bool operator==(const url_pattern_init &) const
std::optional< std::string > username
std::optional< std::string > pathname
std::optional< std::string > hash
Generic URL struct reliant on std::string instantiation.
Definition url.h:44
std::string get_search() const noexcept
Definition url.cpp:641
constexpr std::string_view get_pathname() const noexcept
Definition url-inl.h:46
std::string get_hash() const noexcept
Definition url.cpp:660
std::string get_hostname() const noexcept
Definition url.cpp:637
const std::string & get_password() const noexcept
Definition url.cpp:652
std::string get_port() const noexcept
Definition url.cpp:656
const std::string & get_username() const noexcept
Definition url.cpp:648
constexpr bool has_search() const noexcept override
Definition url-inl.h:163
std::string get_protocol() const noexcept
Definition url.cpp:615
constexpr bool has_hash() const noexcept override
Definition url-inl.h:159
Declaration for the URLPattern implementation.
Declaration for the URLPattern helpers.