Ada 2.8.0
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_INL_H
6#define ADA_URL_INL_H
7
8#include "ada/checkers.h"
9#include "ada/url.h"
10#include "ada/url_components.h"
11
12#include <optional>
13#include <string>
14#if ADA_REGULAR_VISUAL_STUDIO
15#include <intrin.h>
16#endif // ADA_REGULAR_VISUAL_STUDIO
17
18namespace ada {
19[[nodiscard]] ada_really_inline bool url::has_credentials() const noexcept {
20 return !username.empty() || !password.empty();
21}
22[[nodiscard]] ada_really_inline bool url::has_port() const noexcept {
23 return port.has_value();
24}
25[[nodiscard]] inline bool url::cannot_have_credentials_or_port() const {
26 return !host.has_value() || host.value().empty() ||
28}
29[[nodiscard]] inline bool url::has_empty_hostname() const noexcept {
30 if (!host.has_value()) {
31 return false;
32 }
33 return host.value().empty();
34}
35[[nodiscard]] inline bool url::has_hostname() const noexcept {
36 return host.has_value();
37}
38inline std::ostream &operator<<(std::ostream &out, const ada::url &u) {
39 return out << u.to_string();
40}
41
42[[nodiscard]] size_t url::get_pathname_length() const noexcept {
43 return path.size();
44}
45
47 const noexcept {
48 url_components out{};
49
50 // protocol ends with ':'. for example: "https:"
51 out.protocol_end = uint32_t(get_protocol().size());
52
53 // Trailing index is always the next character of the current one.
54 size_t running_index = out.protocol_end;
55
56 if (host.has_value()) {
57 // 2 characters for "//" and 1 character for starting index
58 out.host_start = out.protocol_end + 2;
59
60 if (has_credentials()) {
61 out.username_end = uint32_t(out.host_start + username.size());
62
63 out.host_start += uint32_t(username.size());
64
65 if (!password.empty()) {
66 out.host_start += uint32_t(password.size() + 1);
67 }
68
69 out.host_end = uint32_t(out.host_start + host.value().size());
70 } else {
71 out.username_end = out.host_start;
72
73 // Host does not start with "@" if it does not include credentials.
74 out.host_end = uint32_t(out.host_start + host.value().size()) - 1;
75 }
76
77 running_index = out.host_end + 1;
78 } else {
79 // Update host start and end date to the same index, since it does not
80 // exist.
81 out.host_start = out.protocol_end;
82 out.host_end = out.host_start;
83
84 if (!has_opaque_path && checkers::begins_with(path, "//")) {
85 // If url's host is null, url does not have an opaque path, url's path's
86 // size is greater than 1, and url's path[0] is the empty string, then
87 // append U+002F (/) followed by U+002E (.) to output.
88 running_index = out.protocol_end + 2;
89 } else {
90 running_index = out.protocol_end;
91 }
92 }
93
94 if (port.has_value()) {
95 out.port = *port;
96 running_index += helpers::fast_digit_count(*port) + 1; // Port omits ':'
97 }
98
99 out.pathname_start = uint32_t(running_index);
100
101 running_index += path.size();
102
103 if (query.has_value()) {
104 out.search_start = uint32_t(running_index);
105 running_index += get_search().size();
106 if (get_search().empty()) {
107 running_index++;
108 }
109 }
110
111 if (hash.has_value()) {
112 out.hash_start = uint32_t(running_index);
113 }
114
115 return out;
116}
117
118inline void url::update_base_hostname(std::string_view input) { host = input; }
119
120inline void url::update_unencoded_base_hash(std::string_view input) {
121 // We do the percent encoding
122 hash = unicode::percent_encode(input,
124}
125
126inline void url::update_base_search(std::string_view input,
127 const uint8_t query_percent_encode_set[]) {
128 query = ada::unicode::percent_encode(input, query_percent_encode_set);
129}
130
131inline void url::update_base_search(std::optional<std::string> input) {
132 query = input;
133}
134
135inline void url::update_base_pathname(const std::string_view input) {
136 path = input;
137}
138
139inline void url::update_base_username(const std::string_view input) {
140 username = input;
141}
142
143inline void url::update_base_password(const std::string_view input) {
144 password = input;
145}
146
147inline void url::update_base_port(std::optional<uint16_t> input) {
148 port = input;
149}
150
151inline void url::clear_pathname() { path.clear(); }
152
153inline void url::clear_search() { query = std::nullopt; }
154
155[[nodiscard]] inline bool url::has_hash() const noexcept {
156 return hash.has_value();
157}
158
159[[nodiscard]] inline bool url::has_search() const noexcept {
160 return query.has_value();
161}
162
163inline void url::set_protocol_as_file() { type = ada::scheme::type::FILE; }
164
165inline void url::set_scheme(std::string &&new_scheme) noexcept {
166 type = ada::scheme::get_scheme_type(new_scheme);
167 // We only move the 'scheme' if it is non-special.
168 if (!is_special()) {
169 non_special_scheme = new_scheme;
170 }
171}
172
173inline void url::copy_scheme(ada::url &&u) noexcept {
174 non_special_scheme = u.non_special_scheme;
175 type = u.type;
176}
177
178inline void url::copy_scheme(const ada::url &u) {
179 non_special_scheme = u.non_special_scheme;
180 type = u.type;
181}
182
183[[nodiscard]] ada_really_inline std::string url::get_href() const noexcept {
184 std::string output = get_protocol();
185
186 if (host.has_value()) {
187 output += "//";
188 if (has_credentials()) {
189 output += username;
190 if (!password.empty()) {
191 output += ":" + get_password();
192 }
193 output += "@";
194 }
195 output += host.value();
196 if (port.has_value()) {
197 output += ":" + get_port();
198 }
199 } else if (!has_opaque_path && checkers::begins_with(path, "//")) {
200 // If url's host is null, url does not have an opaque path, url's path's
201 // size is greater than 1, and url's path[0] is the empty string, then
202 // append U+002F (/) followed by U+002E (.) to output.
203 output += "/.";
204 }
205 output += path;
206 if (query.has_value()) {
207 output += "?" + query.value();
208 }
209 if (hash.has_value()) {
210 output += "#" + hash.value();
211 }
212 return output;
213}
214
215ada_really_inline size_t url::parse_port(std::string_view view,
216 bool check_trailing_content) noexcept {
217 ada_log("parse_port('", view, "') ", view.size());
218 uint16_t parsed_port{};
219 auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port);
220 if (r.ec == std::errc::result_out_of_range) {
221 ada_log("parse_port: std::errc::result_out_of_range");
222 is_valid = false;
223 return 0;
224 }
225 ada_log("parse_port: ", parsed_port);
226 const size_t consumed = size_t(r.ptr - view.data());
227 ada_log("parse_port: consumed ", consumed);
228 if (check_trailing_content) {
229 is_valid &=
230 (consumed == view.size() || view[consumed] == '/' ||
231 view[consumed] == '?' || (is_special() && view[consumed] == '\\'));
232 }
233 ada_log("parse_port: is_valid = ", is_valid);
234 if (is_valid) {
235 // scheme_default_port can return 0, and we should allow 0 as a base port.
236 auto default_port = scheme_default_port();
237 bool is_port_valid = (default_port == 0 && parsed_port == 0) ||
238 (default_port != parsed_port);
239 port = (r.ec == std::errc() && is_port_valid)
240 ? std::optional<uint16_t>(parsed_port)
241 : std::nullopt;
242 }
243 return consumed;
244}
245
246} // namespace ada
247
248#endif // ADA_URL_H
Declarations for URL specific checkers used within Ada.
#define ada_really_inline
Definition common_defs.h:84
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
ada_really_inline bool begins_with(std::string_view view, std::string_view prefix)
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept
Definition scheme-inl.h:72
Definition ada_idna.h:13
std::ostream & operator<<(std::ostream &out, const ada::url &u)
Definition url-inl.h:38
bool has_opaque_path
Definition url_base.h:55
URL Component representations using offsets.
Generic URL struct reliant on std::string instantiation.
Definition url.h:38
std::string get_search() const noexcept
ada_really_inline ada::url_components get_components() const noexcept
Definition url-inl.h:46
bool has_empty_hostname() const noexcept
Definition url-inl.h:29
bool has_port() const noexcept
Definition url-inl.h:22
ada_really_inline bool has_credentials() const noexcept
Definition url-inl.h:19
bool has_hash() const noexcept override
Definition url-inl.h:155
ada_really_inline size_t get_pathname_length() const noexcept
Definition url-inl.h:42
ada_really_inline std::string get_href() const noexcept
Definition url-inl.h:183
bool has_hostname() const noexcept
Definition url-inl.h:35
const std::string & get_password() const noexcept
std::string get_port() const noexcept
std::string to_string() const override
Definition url.cpp:536
std::string get_protocol() const noexcept
bool has_search() const noexcept override
Definition url-inl.h:159
Declaration for the URL.
Declaration for the URL Components.