Ada 3.0.1
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url_pattern.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_PATTERN_H
6#define ADA_URL_PATTERN_H
7
9#include "ada/expected.h"
10#include "ada/parser.h"
12
13#include <string>
14#include <unordered_map>
15#include <variant>
16#include <vector>
17
18#if ADA_TESTING
19#include <iostream>
20#endif // ADA_TESTING
21
22namespace ada {
23
24enum class url_pattern_part_type : uint8_t {
25 // The part represents a simple fixed text string.
27 // The part represents a matching group with a custom regular expression.
29 // The part represents a matching group that matches code points up to the
30 // next separator code point. This is typically used for a named group like
31 // ":foo" that does not have a custom regular expression.
33 // The part represents a matching group that greedily matches all code points.
34 // This is typically used for the "*" wildcard matching group.
36};
37
38enum class url_pattern_part_modifier : uint8_t {
39 // The part does not have a modifier.
41 // The part has an optional modifier indicated by the U+003F (?) code point.
43 // The part has a "zero or more" modifier indicated by the U+002A (*) code
44 // point.
46 // The part has a "one or more" modifier indicated by the U+002B (+) code
47 // point.
49};
50
51// @see https://urlpattern.spec.whatwg.org/#part
53 public:
54 url_pattern_part(url_pattern_part_type _type, std::string&& _value,
56 : type(_type), value(_value), modifier(_modifier) {}
57
58 url_pattern_part(url_pattern_part_type _type, std::string&& _value,
59 url_pattern_part_modifier _modifier, std::string&& _name,
60 std::string&& _prefix, std::string&& _suffix)
61 : type(_type),
62 value(_value),
63 modifier(_modifier),
64 name(_name),
65 prefix(_prefix),
66 suffix(_suffix) {}
67 // A part has an associated type, a string, which must be set upon creation.
69 // A part has an associated value, a string, which must be set upon creation.
70 std::string value;
71 // A part has an associated modifier a string, which must be set upon
72 // creation.
74 // A part has an associated name, a string, initially the empty string.
75 std::string name{};
76 // A part has an associated prefix, a string, initially the empty string.
77 std::string prefix{};
78 // A part has an associated suffix, a string, initially the empty string.
79 std::string suffix{};
80
81 inline bool is_regexp() const noexcept;
82};
83
84// @see https://urlpattern.spec.whatwg.org/#options-header
88 std::optional<char> new_delimiter = std::nullopt,
89 std::optional<char> new_prefix = std::nullopt)
90 : delimiter(new_delimiter), prefix(new_prefix) {}
91
92 inline std::string_view get_delimiter() const ada_warn_unused;
93 inline std::string_view get_prefix() const ada_warn_unused;
94
95 // @see https://urlpattern.spec.whatwg.org/#options-ignore-case
96 bool ignore_case = false;
97
101
102 private:
103 // @see https://urlpattern.spec.whatwg.org/#options-delimiter-code-point
104 std::optional<char> delimiter{};
105 // @see https://urlpattern.spec.whatwg.org/#options-prefix-code-point
106 std::optional<char> prefix{};
107};
108
109// The default options is an options struct with delimiter code point set to
110// the empty string and prefix code point set to the empty string.
111inline url_pattern_compile_component_options
112 url_pattern_compile_component_options::DEFAULT(std::nullopt, std::nullopt);
113
114// The hostname options is an options struct with delimiter code point set
115// "." and prefix code point set to the empty string.
116inline url_pattern_compile_component_options
118
119// The pathname options is an options struct with delimiter code point set
120// "/" and prefix code point set to "/".
121inline url_pattern_compile_component_options
123
124// A struct providing the URLPattern matching results for a single
125// URL component. The URLPatternComponentResult is only ever used
126// as a member attribute of a URLPatternResult struct. The
127// URLPatternComponentResult API is defined as part of the URLPattern
128// specification.
130 std::string input;
131 std::unordered_map<std::string, std::optional<std::string>> groups;
132
133 bool operator==(const url_pattern_component_result&) const;
134
135#if ADA_TESTING
136 friend void PrintTo(const url_pattern_component_result& result,
137 std::ostream* os) {
138 *os << "input: '" << result.input << "', group: ";
139 for (const auto& group : result.groups) {
140 *os << "(" << group.first << ", " << group.second.value_or("undefined")
141 << ") ";
142 }
143 }
144#endif // ADA_TESTING
145};
146
147template <url_pattern_regex::regex_concept regex_provider>
149 public:
151
152 // This function explicitly takes a std::string because it is moved.
153 // To avoid unnecessary copy, move each value while calling the constructor.
154 url_pattern_component(std::string&& new_pattern,
155 typename regex_provider::regex_type&& new_regexp,
156 std::vector<std::string>&& new_group_name_list,
157 bool new_has_regexp_groups)
158 : regexp(std::move(new_regexp)),
159 pattern(std::move(new_pattern)),
160 group_name_list(new_group_name_list),
161 has_regexp_groups(new_has_regexp_groups) {}
162
163 // @see https://urlpattern.spec.whatwg.org/#compile-a-component
164 template <url_pattern_encoding_callback F>
165 static tl::expected<url_pattern_component, errors> compile(
166 std::string_view input, F& encoding_callback,
168
169 // @see https://urlpattern.spec.whatwg.org/#create-a-component-match-result
171 std::string_view input,
172 std::vector<std::optional<std::string>>&& exec_result);
173
174#if ADA_TESTING
175 friend void PrintTo(const url_pattern_component& component,
176 std::ostream* os) {
177 *os << "pattern: '" << component.pattern
178 << "', has_regexp_groups: " << component.has_regexp_groups
179 << "group_name_list: ";
180 for (const auto& name : component.group_name_list) {
181 *os << name << ", ";
182 }
183 }
184#endif // ADA_TESTING
185
186 typename regex_provider::regex_type regexp{};
187 std::string pattern{};
188 std::vector<std::string> group_name_list{};
189 bool has_regexp_groups = false;
190};
191
192using url_pattern_input = std::variant<std::string_view, url_pattern_init>;
193
194// A struct providing the URLPattern matching results for all
195// components of a URL. The URLPatternResult API is defined as
196// part of the URLPattern specification.
208
210 bool ignore_case = false;
211
212#if ADA_TESTING
213 friend void PrintTo(const url_pattern_options& options, std::ostream* os) {
214 *os << "ignore_case: '" << options.ignore_case;
215 }
216#endif // ADA_TESTING
217};
218
219// URLPattern is a Web Platform standard API for matching URLs against a
220// pattern syntax (think of it as a regular expression for URLs). It is
221// defined in https://wicg.github.io/urlpattern.
222// More information about the URL Pattern syntax can be found at
223// https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
224template <url_pattern_regex::regex_concept regex_provider>
226 public:
227 url_pattern() = default;
228
233 const url_pattern_input& input,
234 const std::string_view* base_url = nullptr);
235
240 const std::string_view* base_url = nullptr);
241
247 const url_pattern_input& input,
248 const std::string_view* base_url_string = nullptr);
249
250 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-protocol
251 [[nodiscard]] std::string_view get_protocol() const ada_lifetime_bound;
252 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-username
253 [[nodiscard]] std::string_view get_username() const ada_lifetime_bound;
254 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-password
255 [[nodiscard]] std::string_view get_password() const ada_lifetime_bound;
256 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-hostname
257 [[nodiscard]] std::string_view get_hostname() const ada_lifetime_bound;
258 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-port
259 [[nodiscard]] std::string_view get_port() const ada_lifetime_bound;
260 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-pathname
261 [[nodiscard]] std::string_view get_pathname() const ada_lifetime_bound;
262 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-search
263 [[nodiscard]] std::string_view get_search() const ada_lifetime_bound;
264 // @see https://urlpattern.spec.whatwg.org/#dom-urlpattern-hash
265 [[nodiscard]] std::string_view get_hash() const ada_lifetime_bound;
266
267 // If ignoreCase is true, the JavaScript regular expression created for each
268 // pattern must use the `vi` flag. Otherwise, they must use the `v` flag.
269 [[nodiscard]] bool ignore_case() const;
270
271 // @see https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups
272 [[nodiscard]] bool has_regexp_groups() const;
273
274#if ADA_TESTING
275 friend void PrintTo(const url_pattern& c, std::ostream* os) {
276 *os << "protocol_component: '" << c.get_protocol() << ", ";
277 *os << "username_component: '" << c.get_username() << ", ";
278 *os << "password_component: '" << c.get_password() << ", ";
279 *os << "hostname_component: '" << c.get_hostname() << ", ";
280 *os << "port_component: '" << c.get_port() << ", ";
281 *os << "pathname_component: '" << c.get_pathname() << ", ";
282 *os << "search_component: '" << c.get_search() << ", ";
283 *os << "hash_component: '" << c.get_hash();
284 }
285#endif // ADA_TESTING
286
287 template <url_pattern_regex::regex_concept P>
288 friend tl::expected<url_pattern<P>, errors> parser::parse_url_pattern_impl(
289 std::variant<std::string_view, url_pattern_init> input,
290 const std::string_view* base_url, const url_pattern_options* options);
291
297 url_pattern_component<regex_provider> protocol_component{};
303 url_pattern_component<regex_provider> username_component{};
309 url_pattern_component<regex_provider> password_component{};
315 url_pattern_component<regex_provider> hostname_component{};
321 url_pattern_component<regex_provider> port_component{};
327 url_pattern_component<regex_provider> pathname_component{};
333 url_pattern_component<regex_provider> search_component{};
339 url_pattern_component<regex_provider> hash_component{};
345 bool ignore_case_ = false;
346};
347
348} // namespace ada
349
350#endif
url_pattern_component(std::string &&new_pattern, typename regex_provider::regex_type &&new_regexp, std::vector< std::string > &&new_group_name_list, bool new_has_regexp_groups)
static tl::expected< url_pattern_component, errors > compile(std::string_view input, F &encoding_callback, url_pattern_compile_component_options &options)
regex_provider::regex_type regexp
std::vector< std::string > group_name_list
url_pattern_component_result create_component_match_result(std::string_view input, std::vector< std::optional< std::string > > &&exec_result)
url_pattern_part(url_pattern_part_type _type, std::string &&_value, url_pattern_part_modifier _modifier)
Definition url_pattern.h:54
url_pattern_part(url_pattern_part_type _type, std::string &&_value, url_pattern_part_modifier _modifier, std::string &&_name, std::string &&_prefix, std::string &&_suffix)
Definition url_pattern.h:58
url_pattern_part_modifier modifier
Definition url_pattern.h:73
bool is_regexp() const noexcept
url_pattern_part_type type
Definition url_pattern.h:68
bool has_regexp_groups() const
std::string_view get_hostname() const ada_lifetime_bound
std::string_view get_port() const ada_lifetime_bound
result< bool > test(const url_pattern_input &input, const std::string_view *base_url=nullptr)
result< std::optional< url_pattern_result > > match(const url_pattern_input &input, const std::string_view *base_url_string=nullptr)
bool ignore_case() const
std::string_view get_password() const ada_lifetime_bound
std::string_view get_protocol() const ada_lifetime_bound
std::string_view get_hash() const ada_lifetime_bound
url_pattern()=default
std::string_view get_username() const ada_lifetime_bound
std::string_view get_pathname() const ada_lifetime_bound
std::string_view get_search() const ada_lifetime_bound
result< std::optional< url_pattern_result > > exec(const url_pattern_input &input, const std::string_view *base_url=nullptr)
#define ada_lifetime_bound
#define ada_warn_unused
Definition common_defs.h:85
Definitions for user facing functions for parsing URL and it's components.
tl::expected< url_pattern< regex_provider >, errors > parse_url_pattern_impl(std::variant< std::string_view, url_pattern_init > input, const std::string_view *base_url, const url_pattern_options *options)
Definition parser-inl.h:17
Definition ada_idna.h:13
url_pattern_part_modifier
Definition url_pattern.h:38
url_pattern_part_type
Definition url_pattern.h:24
errors
Definition errors.h:10
tl::expected< result_type, ada::errors > result
std::variant< std::string_view, url_pattern_init > url_pattern_input
Definitions for the parser.
url_pattern_compile_component_options(std::optional< char > new_delimiter=std::nullopt, std::optional< char > new_prefix=std::nullopt)
Definition url_pattern.h:87
static url_pattern_compile_component_options HOSTNAME
Definition url_pattern.h:99
static url_pattern_compile_component_options PATHNAME
static url_pattern_compile_component_options DEFAULT
Definition url_pattern.h:98
bool operator==(const url_pattern_component_result &) const
std::unordered_map< std::string, std::optional< std::string > > groups
std::vector< url_pattern_input > inputs
url_pattern_component_result hostname
url_pattern_component_result password
url_pattern_component_result hash
url_pattern_component_result port
url_pattern_component_result protocol
url_pattern_component_result pathname
url_pattern_component_result username
url_pattern_component_result search
Declaration for the url_pattern_init implementation.