Ada 3.2.1
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_INL_H
6#define ADA_URL_PATTERN_INL_H
7
8#include "ada/common_defs.h"
10#include "ada/url_pattern.h"
11
12#include <algorithm>
13#include <string_view>
14#include <utility>
15
16namespace ada {
17
18inline bool url_pattern_init::operator==(const url_pattern_init& other) const {
19 return protocol == other.protocol && username == other.username &&
20 password == other.password && hostname == other.hostname &&
21 port == other.port && search == other.search && hash == other.hash &&
22 pathname == other.pathname;
23}
24
26 const url_pattern_component_result& other) const {
27 return input == other.input && groups == other.groups;
28}
29
30template <url_pattern_regex::regex_concept regex_provider>
33 std::string&& input,
34 std::vector<std::optional<std::string>>&& exec_result) {
35 // Let result be a new URLPatternComponentResult.
36 // Set result["input"] to input.
37 // Let groups be a record<USVString, (USVString or undefined)>.
38 auto result =
39 url_pattern_component_result{.input = std::move(input), .groups = {}};
40
41 // Optimization: Let's reserve the size.
42 result.groups.reserve(exec_result.size());
43
44 // We explicitly start iterating from 0 even though the spec
45 // says we should start from 1. This case is handled by the
46 // std_regex_provider.
47 for (size_t index = 0; index < exec_result.size(); index++) {
48 result.groups.insert({
49 group_name_list[index],
50 std::move(exec_result[index]),
51 });
52 }
53 return result;
54}
55
56template <url_pattern_regex::regex_concept regex_provider>
59 // Return this's associated URL pattern's protocol component's pattern string.
60 return protocol_component.pattern;
61}
62template <url_pattern_regex::regex_concept regex_provider>
65 // Return this's associated URL pattern's username component's pattern string.
66 return username_component.pattern;
67}
68template <url_pattern_regex::regex_concept regex_provider>
71 // Return this's associated URL pattern's password component's pattern string.
72 return password_component.pattern;
73}
74template <url_pattern_regex::regex_concept regex_provider>
77 // Return this's associated URL pattern's hostname component's pattern string.
78 return hostname_component.pattern;
79}
80template <url_pattern_regex::regex_concept regex_provider>
83 // Return this's associated URL pattern's port component's pattern string.
84 return port_component.pattern;
85}
86template <url_pattern_regex::regex_concept regex_provider>
89 // Return this's associated URL pattern's pathname component's pattern string.
90 return pathname_component.pattern;
91}
92template <url_pattern_regex::regex_concept regex_provider>
95 // Return this's associated URL pattern's search component's pattern string.
96 return search_component.pattern;
97}
98template <url_pattern_regex::regex_concept regex_provider>
101 // Return this's associated URL pattern's hash component's pattern string.
102 return hash_component.pattern;
103}
104template <url_pattern_regex::regex_concept regex_provider>
106 return ignore_case_;
107}
108template <url_pattern_regex::regex_concept regex_provider>
110 // If this's associated URL pattern's has regexp groups, then return true.
111 return protocol_component.has_regexp_groups ||
112 username_component.has_regexp_groups ||
113 password_component.has_regexp_groups ||
114 hostname_component.has_regexp_groups ||
115 port_component.has_regexp_groups ||
116 pathname_component.has_regexp_groups ||
117 search_component.has_regexp_groups || hash_component.has_regexp_groups;
118}
119
120inline bool url_pattern_part::is_regexp() const noexcept {
122}
123
125 const {
126 if (delimiter) {
127 return {&delimiter.value(), 1};
128 }
129 return {};
130}
131
133 const {
134 if (prefix) {
135 return {&prefix.value(), 1};
136 }
137 return {};
138}
139
140template <url_pattern_regex::regex_concept regex_provider>
141template <url_pattern_encoding_callback F>
142tl::expected<url_pattern_component<regex_provider>, errors>
144 std::string_view input, F& encoding_callback,
146 ada_log("url_pattern_component::compile input: ", input);
147 // Let part list be the result of running parse a pattern string given input,
148 // options, and encoding callback.
149 auto part_list = url_pattern_helpers::parse_pattern_string(input, options,
150 encoding_callback);
151
152 if (!part_list) {
153 ada_log("parse_pattern_string failed");
154 return tl::unexpected(part_list.error());
155 }
156
157 // Let (regular expression string, name list) be the result of running
158 // generate a regular expression and name list given part list and options.
159 auto [regular_expression_string, name_list] =
161 options);
162
163 ada_log("regular expression string: ", regular_expression_string);
164
165 // Let pattern string be the result of running generate a pattern
166 // string given part list and options.
167 auto pattern_string =
169
170 // Let regular expression be RegExpCreate(regular expression string,
171 // flags). If this throws an exception, catch it, and throw a
172 // TypeError.
173 std::optional<typename regex_provider::regex_type> regular_expression =
174 regex_provider::create_instance(regular_expression_string,
175 options.ignore_case);
176
177 if (!regular_expression) {
178 return tl::unexpected(errors::type_error);
179 }
180
181 // For each part of part list:
182 // - If part's type is "regexp", then set has regexp groups to true.
183 const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
184 const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
185
186 ada_log("has regexp groups: ", has_regexp_groups);
187
188 // Return a new component whose pattern string is pattern string, regular
189 // expression is regular expression, group name list is name list, and has
190 // regexp groups is has regexp groups.
192 std::move(pattern_string), std::move(*regular_expression),
193 std::move(name_list), has_regexp_groups);
194}
195
196template <url_pattern_regex::regex_concept regex_provider>
198 const url_pattern_input& input, const std::string_view* base_url) {
199 // Return the result of match given this's associated URL pattern, input, and
200 // baseURL if given.
201 return match(input, base_url);
202}
203
204template <url_pattern_regex::regex_concept regex_provider>
206 const url_pattern_input& input, const std::string_view* base_url) {
207 // TODO: Optimization opportunity. Rather than returning `url_pattern_result`
208 // Implement a fast path just like `can_parse()` in ada_url.
209 // Let result be the result of match given this's associated URL pattern,
210 // input, and baseURL if given.
211 // If result is null, return false.
212 if (auto result = match(input, base_url); result.has_value()) {
213 return result->has_value();
214 }
215 return tl::unexpected(errors::type_error);
216}
217
218template <url_pattern_regex::regex_concept regex_provider>
220 const url_pattern_input& input, const std::string_view* base_url_string) {
221 std::string protocol{};
222 std::string username{};
223 std::string password{};
224 std::string hostname{};
225 std::string port{};
226 std::string pathname{};
227 std::string search{};
228 std::string hash{};
229
230 // Let inputs be an empty list.
231 // Append input to inputs.
232 std::vector inputs{input};
233
234 // If input is a URLPatternInit then:
235 if (std::holds_alternative<url_pattern_init>(input)) {
236 ada_log(
237 "url_pattern::match called with url_pattern_init and base_url_string=",
238 base_url_string);
239 // If baseURLString was given, throw a TypeError.
240 if (base_url_string) {
241 ada_log("failed to match because base_url_string was given");
242 return tl::unexpected(errors::type_error);
243 }
244
245 // Let applyResult be the result of process a URLPatternInit given input,
246 // "url", protocol, username, password, hostname, port, pathname, search,
247 // and hash.
248 auto apply_result = url_pattern_init::process(
249 std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
250 protocol, username, password, hostname, port, pathname, search, hash);
251
252 // If this throws an exception, catch it, and return null.
253 if (!apply_result.has_value()) {
254 ada_log("match returned std::nullopt because process threw");
255 return std::nullopt;
256 }
257
258 // Set protocol to applyResult["protocol"].
259 ADA_ASSERT_TRUE(apply_result->protocol.has_value());
260 protocol = std::move(apply_result->protocol.value());
261
262 // Set username to applyResult["username"].
263 ADA_ASSERT_TRUE(apply_result->username.has_value());
264 username = std::move(apply_result->username.value());
265
266 // Set password to applyResult["password"].
267 ADA_ASSERT_TRUE(apply_result->password.has_value());
268 password = std::move(apply_result->password.value());
269
270 // Set hostname to applyResult["hostname"].
271 ADA_ASSERT_TRUE(apply_result->hostname.has_value());
272 hostname = std::move(apply_result->hostname.value());
273
274 // Set port to applyResult["port"].
275 ADA_ASSERT_TRUE(apply_result->port.has_value());
276 port = std::move(apply_result->port.value());
277
278 // Set pathname to applyResult["pathname"].
279 ADA_ASSERT_TRUE(apply_result->pathname.has_value());
280 pathname = std::move(apply_result->pathname.value());
281
282 // Set search to applyResult["search"].
283 ADA_ASSERT_TRUE(apply_result->search.has_value());
284 if (apply_result->search->starts_with("?")) {
285 search = apply_result->search->substr(1);
286 } else {
287 search = std::move(apply_result->search.value());
288 }
289
290 // Set hash to applyResult["hash"].
291 ADA_ASSERT_TRUE(apply_result->hash.has_value());
292 ADA_ASSERT_TRUE(!apply_result->hash->starts_with("#"));
293 hash = std::move(apply_result->hash.value());
294 } else {
295 ADA_ASSERT_TRUE(std::holds_alternative<std::string_view>(input));
296
297 // Let baseURL be null.
298 result<url_aggregator> base_url;
299
300 // If baseURLString was given, then:
301 if (base_url_string) {
302 // Let baseURL be the result of parsing baseURLString.
303 base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
304
305 // If baseURL is failure, return null.
306 if (!base_url) {
307 ada_log("match returned std::nullopt because failed to parse base_url=",
308 *base_url_string);
309 return std::nullopt;
310 }
311
312 // Append baseURLString to inputs.
313 inputs.emplace_back(*base_url_string);
314 }
315
316 url_aggregator* base_url_value =
317 base_url.has_value() ? &*base_url : nullptr;
318
319 // Set url to the result of parsing input given baseURL.
320 auto url = ada::parse<url_aggregator>(std::get<std::string_view>(input),
321 base_url_value);
322
323 // If url is failure, return null.
324 if (!url) {
325 ada_log("match returned std::nullopt because url failed");
326 return std::nullopt;
327 }
328
329 // Set protocol to url's scheme.
330 // IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
331 // is removed. Similar work was done on workerd:
332 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
333 protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
334 // Set username to url's username.
335 username = url->get_username();
336 // Set password to url's password.
337 password = url->get_password();
338 // Set hostname to url's host, serialized, or the empty string if the value
339 // is null.
340 hostname = url->get_hostname();
341 // Set port to url's port, serialized, or the empty string if the value is
342 // null.
343 port = url->get_port();
344 // Set pathname to the result of URL path serializing url.
345 pathname = url->get_pathname();
346 // Set search to url's query or the empty string if the value is null.
347 // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?'
348 // is removed. Similar work was done on workerd:
349 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
350 if (url->has_search()) {
351 auto view = url->get_search();
352 search = view.starts_with("?") ? url->get_search().substr(1) : view;
353 }
354 // Set hash to url's fragment or the empty string if the value is null.
355 // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
356 // removed. Similar work was done on workerd:
357 // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
358 if (url->has_hash()) {
359 auto view = url->get_hash();
360 hash = view.starts_with("#") ? url->get_hash().substr(1) : view;
361 }
362 }
363
364 // Let protocolExecResult be RegExpBuiltinExec(urlPattern's protocol
365 // component's regular expression, protocol).
366 auto protocol_exec_result =
367 regex_provider::regex_search(protocol, protocol_component.regexp);
368
369 if (!protocol_exec_result) {
370 return std::nullopt;
371 }
372
373 // Let usernameExecResult be RegExpBuiltinExec(urlPattern's username
374 // component's regular expression, username).
375 auto username_exec_result =
376 regex_provider::regex_search(username, username_component.regexp);
377
378 if (!username_exec_result) {
379 return std::nullopt;
380 }
381
382 // Let passwordExecResult be RegExpBuiltinExec(urlPattern's password
383 // component's regular expression, password).
384 auto password_exec_result =
385 regex_provider::regex_search(password, password_component.regexp);
386
387 if (!password_exec_result) {
388 return std::nullopt;
389 }
390
391 // Let hostnameExecResult be RegExpBuiltinExec(urlPattern's hostname
392 // component's regular expression, hostname).
393 auto hostname_exec_result =
394 regex_provider::regex_search(hostname, hostname_component.regexp);
395
396 if (!hostname_exec_result) {
397 return std::nullopt;
398 }
399
400 // Let portExecResult be RegExpBuiltinExec(urlPattern's port component's
401 // regular expression, port).
402 auto port_exec_result =
403 regex_provider::regex_search(port, port_component.regexp);
404
405 if (!port_exec_result) {
406 return std::nullopt;
407 }
408
409 // Let pathnameExecResult be RegExpBuiltinExec(urlPattern's pathname
410 // component's regular expression, pathname).
411 auto pathname_exec_result =
412 regex_provider::regex_search(pathname, pathname_component.regexp);
413
414 if (!pathname_exec_result) {
415 return std::nullopt;
416 }
417
418 // Let searchExecResult be RegExpBuiltinExec(urlPattern's search component's
419 // regular expression, search).
420 auto search_exec_result =
421 regex_provider::regex_search(search, search_component.regexp);
422
423 if (!search_exec_result) {
424 return std::nullopt;
425 }
426
427 // Let hashExecResult be RegExpBuiltinExec(urlPattern's hash component's
428 // regular expression, hash).
429 auto hash_exec_result =
430 regex_provider::regex_search(hash, hash_component.regexp);
431
432 if (!hash_exec_result) {
433 return std::nullopt;
434 }
435
436 // Let result be a new URLPatternResult.
437 auto result = url_pattern_result{};
438 // Set result["inputs"] to inputs.
439 result.inputs = std::move(inputs);
440 // Set result["protocol"] to the result of creating a component match result
441 // given urlPattern's protocol component, protocol, and protocolExecResult.
442 result.protocol = protocol_component.create_component_match_result(
443 std::move(protocol), std::move(*protocol_exec_result));
444
445 // Set result["username"] to the result of creating a component match result
446 // given urlPattern's username component, username, and usernameExecResult.
447 result.username = username_component.create_component_match_result(
448 std::move(username), std::move(*username_exec_result));
449
450 // Set result["password"] to the result of creating a component match result
451 // given urlPattern's password component, password, and passwordExecResult.
452 result.password = password_component.create_component_match_result(
453 std::move(password), std::move(*password_exec_result));
454
455 // Set result["hostname"] to the result of creating a component match result
456 // given urlPattern's hostname component, hostname, and hostnameExecResult.
457 result.hostname = hostname_component.create_component_match_result(
458 std::move(hostname), std::move(*hostname_exec_result));
459
460 // Set result["port"] to the result of creating a component match result given
461 // urlPattern's port component, port, and portExecResult.
462 result.port = port_component.create_component_match_result(
463 std::move(port), std::move(*port_exec_result));
464
465 // Set result["pathname"] to the result of creating a component match result
466 // given urlPattern's pathname component, pathname, and pathnameExecResult.
467 result.pathname = pathname_component.create_component_match_result(
468 std::move(pathname), std::move(*pathname_exec_result));
469
470 // Set result["search"] to the result of creating a component match result
471 // given urlPattern's search component, search, and searchExecResult.
472 result.search = search_component.create_component_match_result(
473 std::move(search), std::move(*search_exec_result));
474
475 // Set result["hash"] to the result of creating a component match result given
476 // urlPattern's hash component, hash, and hashExecResult.
477 result.hash = hash_component.create_component_match_result(
478 std::move(hash), std::move(*hash_exec_result));
479
480 return result;
481}
482
483} // namespace ada
484
485#endif
url_pattern_component_result create_component_match_result(std::string &&input, std::vector< std::optional< std::string > > &&exec_result)
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
std::vector< std::string > group_name_list
bool is_regexp() const noexcept
url_pattern_part_type type
Definition url_pattern.h:70
bool has_regexp_groups() const
std::string_view get_hostname() const ada_lifetime_bound
std::string_view get_port() const ada_lifetime_bound
result< bool > test(const url_pattern_input &input, const std::string_view *base_url=nullptr)
result< std::optional< url_pattern_result > > match(const url_pattern_input &input, const std::string_view *base_url_string=nullptr)
bool ignore_case() const
std::string_view get_password() const ada_lifetime_bound
std::string_view get_protocol() const ada_lifetime_bound
std::string_view get_hash() const ada_lifetime_bound
std::string_view get_username() const ada_lifetime_bound
std::string_view get_pathname() const ada_lifetime_bound
std::string_view get_search() const ada_lifetime_bound
result< std::optional< url_pattern_result > > exec(const url_pattern_input &input, const std::string_view *base_url=nullptr)
Common definitions for cross-platform compiler support.
#define ADA_ASSERT_TRUE(COND)
#define ada_lifetime_bound
tl::expected< std::vector< url_pattern_part >, errors > parse_pattern_string(std::string_view input, url_pattern_compile_component_options &options, F &encoding_callback)
std::string generate_pattern_string(std::vector< url_pattern_part > &part_list, url_pattern_compile_component_options &options)
std::tuple< std::string, std::vector< std::string > > generate_regular_expression_and_name_list(const std::vector< url_pattern_part > &part_list, url_pattern_compile_component_options options)
Definition ada_idna.h:13
errors
Definition errors.h:10
@ type_error
Definition errors.h:10
template ada::result< url_aggregator > parse< url_aggregator >(std::string_view input, const url_aggregator *base_url)
tl::expected< result_type, ada::errors > result
std::variant< std::string_view, url_pattern_init > url_pattern_input
Lightweight URL struct.
std::string_view get_prefix() const ada_warn_unused
std::string_view get_delimiter() const ada_warn_unused
bool operator==(const url_pattern_component_result &) const
std::unordered_map< std::string, std::optional< std::string > > groups
std::optional< std::string > port
std::optional< std::string > protocol
std::optional< std::string > password
std::optional< std::string > hostname
std::optional< std::string > search
bool operator==(const url_pattern_init &) const
std::optional< std::string > username
static tl::expected< url_pattern_init, errors > process(const url_pattern_init &init, process_type type, std::optional< std::string_view > protocol=std::nullopt, std::optional< std::string_view > username=std::nullopt, std::optional< std::string_view > password=std::nullopt, std::optional< std::string_view > hostname=std::nullopt, std::optional< std::string_view > port=std::nullopt, std::optional< std::string_view > pathname=std::nullopt, std::optional< std::string_view > search=std::nullopt, std::optional< std::string_view > hash=std::nullopt)
std::optional< std::string > pathname
std::optional< std::string > hash
Generic URL struct reliant on std::string instantiation.
Definition url.h:45
std::string get_search() const noexcept
Definition url.cpp:641
constexpr std::string_view get_pathname() const noexcept
Definition url-inl.h:46
std::string get_hash() const noexcept
Definition url.cpp:660
std::string get_hostname() const noexcept
Definition url.cpp:637
const std::string & get_password() const noexcept
Definition url.cpp:652
std::string get_port() const noexcept
Definition url.cpp:656
const std::string & get_username() const noexcept
Definition url.cpp:648
constexpr bool has_search() const noexcept override
Definition url-inl.h:163
std::string get_protocol() const noexcept
Definition url.cpp:615
constexpr bool has_hash() const noexcept override
Definition url-inl.h:159
Declaration for the URLPattern implementation.
Declaration for the URLPattern helpers.