Ada 2.9.2
Fast spec-compliant URL parser
Loading...
Searching...
No Matches
url-inl.h
Go to the documentation of this file.
1
5#ifndef ADA_URL_INL_H
6#define ADA_URL_INL_H
7
8#include "ada/checkers.h"
9#include "ada/url.h"
10#include "ada/url_components.h"
11
12#include <optional>
13#include <string>
14#if ADA_REGULAR_VISUAL_STUDIO
15#include <intrin.h>
16#endif // ADA_REGULAR_VISUAL_STUDIO
17
18namespace ada {
19[[nodiscard]] ada_really_inline bool url::has_credentials() const noexcept {
20 return !username.empty() || !password.empty();
21}
22[[nodiscard]] ada_really_inline bool url::has_port() const noexcept {
23 return port.has_value();
24}
25[[nodiscard]] inline bool url::cannot_have_credentials_or_port() const {
26 return !host.has_value() || host.value().empty() ||
28}
29[[nodiscard]] inline bool url::has_empty_hostname() const noexcept {
30 if (!host.has_value()) {
31 return false;
32 }
33 return host.value().empty();
34}
35[[nodiscard]] inline bool url::has_hostname() const noexcept {
36 return host.has_value();
37}
38inline std::ostream &operator<<(std::ostream &out, const ada::url &u) {
39 return out << u.to_string();
40}
41
42[[nodiscard]] size_t url::get_pathname_length() const noexcept {
43 return path.size();
44}
45
46[[nodiscard]] constexpr std::string_view url::get_pathname() const noexcept {
47 return path;
48}
49
51 const noexcept {
52 url_components out{};
53
54 // protocol ends with ':'. for example: "https:"
55 out.protocol_end = uint32_t(get_protocol().size());
56
57 // Trailing index is always the next character of the current one.
58 size_t running_index = out.protocol_end;
59
60 if (host.has_value()) {
61 // 2 characters for "//" and 1 character for starting index
62 out.host_start = out.protocol_end + 2;
63
64 if (has_credentials()) {
65 out.username_end = uint32_t(out.host_start + username.size());
66
67 out.host_start += uint32_t(username.size());
68
69 if (!password.empty()) {
70 out.host_start += uint32_t(password.size() + 1);
71 }
72
73 out.host_end = uint32_t(out.host_start + host.value().size());
74 } else {
75 out.username_end = out.host_start;
76
77 // Host does not start with "@" if it does not include credentials.
78 out.host_end = uint32_t(out.host_start + host.value().size()) - 1;
79 }
80
81 running_index = out.host_end + 1;
82 } else {
83 // Update host start and end date to the same index, since it does not
84 // exist.
85 out.host_start = out.protocol_end;
86 out.host_end = out.host_start;
87
88 if (!has_opaque_path && path.starts_with("//")) {
89 // If url's host is null, url does not have an opaque path, url's path's
90 // size is greater than 1, and url's path[0] is the empty string, then
91 // append U+002F (/) followed by U+002E (.) to output.
92 running_index = out.protocol_end + 2;
93 } else {
94 running_index = out.protocol_end;
95 }
96 }
97
98 if (port.has_value()) {
99 out.port = *port;
100 running_index += helpers::fast_digit_count(*port) + 1; // Port omits ':'
101 }
102
103 out.pathname_start = uint32_t(running_index);
104
105 running_index += path.size();
106
107 if (query.has_value()) {
108 out.search_start = uint32_t(running_index);
109 running_index += get_search().size();
110 if (get_search().empty()) {
111 running_index++;
112 }
113 }
114
115 if (hash.has_value()) {
116 out.hash_start = uint32_t(running_index);
117 }
118
119 return out;
120}
121
122inline void url::update_base_hostname(std::string_view input) { host = input; }
123
124inline void url::update_unencoded_base_hash(std::string_view input) {
125 // We do the percent encoding
126 hash = unicode::percent_encode(input,
128}
129
130inline void url::update_base_search(std::string_view input,
131 const uint8_t query_percent_encode_set[]) {
132 query = ada::unicode::percent_encode(input, query_percent_encode_set);
133}
134
135inline void url::update_base_search(std::optional<std::string> &&input) {
136 query = std::move(input);
137}
138
139inline void url::update_base_pathname(const std::string_view input) {
140 path = input;
141}
142
143inline void url::update_base_username(const std::string_view input) {
144 username = input;
145}
146
147inline void url::update_base_password(const std::string_view input) {
148 password = input;
149}
150
151inline void url::update_base_port(std::optional<uint16_t> input) {
152 port = input;
153}
154
155constexpr void url::clear_pathname() { path.clear(); }
156
157constexpr void url::clear_search() { query = std::nullopt; }
158
159[[nodiscard]] constexpr bool url::has_hash() const noexcept {
160 return hash.has_value();
161}
162
163[[nodiscard]] constexpr bool url::has_search() const noexcept {
164 return query.has_value();
165}
166
167constexpr void url::set_protocol_as_file() { type = ada::scheme::type::FILE; }
168
169inline void url::set_scheme(std::string &&new_scheme) noexcept {
170 type = ada::scheme::get_scheme_type(new_scheme);
171 // We only move the 'scheme' if it is non-special.
172 if (!is_special()) {
173 non_special_scheme = std::move(new_scheme);
174 }
175}
176
177constexpr void url::copy_scheme(ada::url &&u) noexcept {
178 non_special_scheme = u.non_special_scheme;
179 type = u.type;
180}
181
182constexpr void url::copy_scheme(const ada::url &u) {
183 non_special_scheme = u.non_special_scheme;
184 type = u.type;
185}
186
187[[nodiscard]] ada_really_inline std::string url::get_href() const noexcept {
188 std::string output = get_protocol();
189
190 if (host.has_value()) {
191 output += "//";
192 if (has_credentials()) {
193 output += username;
194 if (!password.empty()) {
195 output += ":" + get_password();
196 }
197 output += "@";
198 }
199 output += host.value();
200 if (port.has_value()) {
201 output += ":" + get_port();
202 }
203 } else if (!has_opaque_path && path.starts_with("//")) {
204 // If url's host is null, url does not have an opaque path, url's path's
205 // size is greater than 1, and url's path[0] is the empty string, then
206 // append U+002F (/) followed by U+002E (.) to output.
207 output += "/.";
208 }
209 output += path;
210 if (query.has_value()) {
211 output += "?" + query.value();
212 }
213 if (hash.has_value()) {
214 output += "#" + hash.value();
215 }
216 return output;
217}
218
219ada_really_inline size_t url::parse_port(std::string_view view,
220 bool check_trailing_content) noexcept {
221 ada_log("parse_port('", view, "') ", view.size());
222 if (!view.empty() && view[0] == '-') {
223 ada_log("parse_port: view[0] == '0' && view.size() > 1");
224 is_valid = false;
225 return 0;
226 }
227 uint16_t parsed_port{};
228 auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port);
229 if (r.ec == std::errc::result_out_of_range) {
230 ada_log("parse_port: r.ec == std::errc::result_out_of_range");
231 is_valid = false;
232 return 0;
233 }
234 ada_log("parse_port: ", parsed_port);
235 const auto consumed = size_t(r.ptr - view.data());
236 ada_log("parse_port: consumed ", consumed);
237 if (check_trailing_content) {
238 is_valid &=
239 (consumed == view.size() || view[consumed] == '/' ||
240 view[consumed] == '?' || (is_special() && view[consumed] == '\\'));
241 }
242 ada_log("parse_port: is_valid = ", is_valid);
243 if (is_valid) {
244 // scheme_default_port can return 0, and we should allow 0 as a base port.
245 auto default_port = scheme_default_port();
246 bool is_port_valid = (default_port == 0 && parsed_port == 0) ||
247 (default_port != parsed_port);
248 port = (r.ec == std::errc() && is_port_valid) ? std::optional(parsed_port)
249 : std::nullopt;
250 }
251 return consumed;
252}
253
254} // namespace ada
255
256#endif // ADA_URL_H
Declarations for URL specific checkers used within Ada.
#define ada_really_inline
Definition common_defs.h:84
constexpr uint8_t FRAGMENT_PERCENT_ENCODE[32]
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept
Definition scheme-inl.h:72
Definition ada_idna.h:13
std::ostream & operator<<(std::ostream &out, const ada::url &u)
Definition url-inl.h:38
bool has_opaque_path
Definition url_base.h:55
URL Component representations using offsets.
Generic URL struct reliant on std::string instantiation.
Definition url.h:38
std::string get_search() const noexcept
Definition url.cpp:641
ada_really_inline ada::url_components get_components() const noexcept
Definition url-inl.h:50
bool has_empty_hostname() const noexcept
Definition url-inl.h:29
bool has_port() const noexcept
Definition url-inl.h:22
ada_really_inline bool has_credentials() const noexcept
Definition url-inl.h:19
ada_really_inline size_t get_pathname_length() const noexcept
Definition url-inl.h:42
ada_really_inline std::string get_href() const noexcept
Definition url-inl.h:187
bool has_hostname() const noexcept
Definition url-inl.h:35
constexpr std::string_view get_pathname() const noexcept
Definition url-inl.h:46
const std::string & get_password() const noexcept
Definition url.cpp:652
std::string get_port() const noexcept
Definition url.cpp:656
constexpr bool has_search() const noexcept override
Definition url-inl.h:163
std::string to_string() const override
Definition url.cpp:533
std::string get_protocol() const noexcept
Definition url.cpp:615
constexpr bool has_hash() const noexcept override
Definition url-inl.h:159
Declaration for the URL.
Declaration for the URL Components.