1#include <uchar.h>
2
3#include "include/pw.h"
4#include "include/pwlib/ctype.h"
5#include "include/pwlib/idna.h"
6#include "include/pwlib/parsers.h"
7#include "include/pwlib/uri.h"
8
9/*
10 * The following schemes may need specific handling
11 *
12 * - data: https://www.rfc-editor.org/rfc/inline-errata/rfc2397.html
13 * The following example is processed incorrectly:
14 * data:text/plain;charset=iso-8859-7,%be%d3%be
15 * because pct decoder knows nothing about charset and tries to convert to UTF-8
16 *
17 * - geo: https://www.rfc-editor.org/rfc/inline-errata/rfc5870.html
18 * Parameters parsing?
19 */
20
21static _PwValue uri_part_keys[] = {
22 PW_STRING("scheme"),
23 PW_STRING("user"),
24 PW_STRING("password"),
25 PW_STRING("host"),
26 PW_STRING("port"),
27 PW_STRING("path"),
28 PW_STRING("query"),
29 PW_STRING("fragment")
30};
31#define URI_SCHEME uri_part_keys[0]
32#define URI_USER uri_part_keys[1]
33#define URI_PASSWORD uri_part_keys[2]
34#define URI_HOST uri_part_keys[3]
35#define URI_PORT uri_part_keys[4]
36#define URI_PATH uri_part_keys[5]
37#define URI_QUERY uri_part_keys[6]
38#define URI_FRAGMENT uri_part_keys[7]
39
40PW_STRUCT(UriParserContext) {
41 PwStringIter uri;
42 bool allow_spaces;
43 unsigned line; // line in this source file where iteration has stopped
44};
45
46#define CTYPE_URI_WS 1
47#define CTYPE_SUBDELIM 2
48#define CTYPE_UNRESERVED 4
49#define CTYPE_PCHAR 8 // :@
50#define CTYPE_PCHAR_NC 16 // @
51#define CTYPE_FRAG_CHAR 32 // :@/?
52#define CTYPE_SCHEME_CHAR 64
53
54static uint8_t chartype[128] = {
55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00,
56 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
57 0x00, 0x02, 0x00, 0x00, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x42, 0x02, 0x44, 0x44, 0x20,
58 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x28, 0x02, 0x00, 0x02, 0x00, 0x20,
59 0x38, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44,
60 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x00, 0x00, 0x00, 0x00, 0x04,
61 0x00, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44,
62 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00
63};
64
65#define is_uri_whitespace(c) (pw_is_ascii(c) && (chartype[c] & CTYPE_URI_WS))
66#define is_subdelim(c) (pw_is_ascii(c) && (chartype[c] & CTYPE_SUBDELIM))
67#define is_unreserved(c) (pw_is_ascii(c) && (chartype[c] & CTYPE_UNRESERVED))
68#define is_ups_char(c, extra) (pw_is_ascii(c) && (chartype[c] & (CTYPE_UNRESERVED | CTYPE_SUBDELIM | (extra))))
69#define is_scheme_char(c) (pw_is_ascii(c) && (chartype[c] & CTYPE_SCHEME_CHAR))
70
71[[nodiscard]] static bool check_end_of_uri(UriParserContext* ctx)
72/*
73 * Check if URI is fully processed
74 */
75{
76 if (ctx->uri.current_ptr >= ctx->uri.end_ptr) {
77 return true;
78 }
79 _PwValue status = PW_STATUS(PweParseError);
80 status.line_number = ctx->line;
81 unsigned char_pos = 1 + (ctx->uri.end_ptr - ctx->uri.current_ptr) / ctx->uri.char_size; // XXX this reports wrong position, need to fix
82 pw_exception(status, "Bad character encountered at %u", char_pos);
83 return false;
84}
85
86static inline bool _end_of(UriParserContext* ctx, unsigned line)
87{
88 if (ctx->uri.current_ptr < ctx->uri.end_ptr) {
89 return false;
90 }
91 ctx->line = line;
92 return true;
93}
94
95#define end_of(ctx) \
96 _end_of(ctx, __LINE__)
97
98[[nodiscard]] static inline bool _get_next_char(UriParserContext* ctx, char32_t* c, unsigned line)
99{
100 while (_pw_string_iter_next(&ctx->uri, c)) {
101 if (!is_uri_whitespace(*c)) {
102 return true;
103 }
104 }
105 ctx->line = line;
106 return false;
107}
108
109#define get_next_char(ctx, c) \
110 _get_next_char((ctx), (c), __LINE__)
111
112[[nodiscard]] static inline char32_t current_char(UriParserContext* ctx)
113{
114 return _pw_string_iter_current(&ctx->uri);
115}
116
117static inline void skip_char(UriParserContext* ctx)
118{
119 char32_t c;
120 while (_pw_string_iter_next(&ctx->uri, &c)) {
121 if (!is_uri_whitespace(c)) {
122 return;
123 }
124 }
125}
126
127static inline void unget_char(UriParserContext* ctx)
128{
129 char32_t c;
130 while (_pw_string_iter_prev(&ctx->uri, &c)) {
131 if (!is_uri_whitespace(c)) {
132 return;
133 }
134 }
135}
136
137[[nodiscard]] static bool parse_scheme(UriParserContext* ctx, PwValuePtr result)
138/*
139 * If URI has no scheme, the result is Null.
140 * Returned scheme is always lower case.
141 *
142 * Upon return the iterator position is next to ":" or untouched.
143 *
144 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
145 */
146{
147 pw_destroy(result); // make sure the result is Null
148
149 char32_t c;
150 PwValue scheme = PW_STRING();
151 uint8_t* saved_current = ctx->uri.current_ptr;
152 if (!get_next_char(ctx, &c)) {
153 return true;
154 }
155 if (!pw_is_ascii_alpha(c)) {
156 goto no_scheme;
157 }
158 // look ahead for scheme
159 do {
160 char lwr = pw_ascii_char_lower(c);
161 if (!pw_string_append(&scheme, lwr)) {
162 return false;
163 }
164 if (!get_next_char(ctx, &c)) {
165 goto no_scheme;
166 }
167 } while (is_scheme_char(c));
168
169 if (c != ':') {
170 goto no_scheme;
171 }
172
173 // got scheme
174 pw_move(result, &scheme);
175 return true;
176
177no_scheme:
178 ctx->uri.current_ptr = saved_current;
179 return true;
180}
181
182[[nodiscard]] static bool parse_double_slash(UriParserContext* ctx)
183/*
184 * Lookahead for double slash.
185 * If present, skip and return true.
186 * Otherwise return false;
187 */
188{
189 uint8_t* saved_current = ctx->uri.current_ptr;
190 for (unsigned i = 0; i < 2; i++) {
191 char32_t c;
192 if (!get_next_char(ctx, &c)) {
193 goto none;
194 }
195 if (c != '/') {
196 goto none;
197 }
198 }
199 return true;
200
201none:
202 ctx->uri.current_ptr = saved_current;
203 return false;
204}
205
206[[nodiscard]] static bool parse_ipv6_addr(UriParserContext* ctx, PwValuePtr result)
207/*
208 * On enter, the URI iterator is at position next to the opening "[".
209 * As a part of URI, IPv6 address must end with "]"
210 */
211{
212 char32_t c;
213 while (get_next_char(ctx, &c)) {
214 if (c == ']') {
215 break;
216 }
217 if (!pw_string_append(result, c)) {
218 return false;
219 }
220 }
221 return true;
222}
223
224[[nodiscard]] static bool parse_pct_hex(UriParserContext* ctx, uint8_t* result)
225/*
226 * Parse two consecutive HEX digits.
227 *
228 * Return true if result is valid,
229 * Otherwise return false which means either end of iteration or wrong character encountered.
230 */
231{
232 char32_t hi;
233 if (!get_next_char(ctx, &hi)) {
234 return false;
235 }
236 if (!pw_parse_hexdigit(&hi)) {
237 unget_char(ctx);
238 return false;
239 }
240 char32_t lo;
241 if (!get_next_char(ctx, &lo)) {
242 return false;
243 }
244 if (!pw_parse_hexdigit(&lo)) {
245 unget_char(ctx);
246 return false;
247 }
248 *result = (uint8_t) ((hi << 4) + lo);
249 return true;
250}
251
252[[nodiscard]] static bool parse_pct_utf8(UriParserContext* ctx, char32_t* result)
253/*
254 * Parse a series of pct-encoded characters as one codepoint in UTF-8.
255 *
256 * Return true if result is valid,
257 * Otherwise return false which means either end of iteration or wrong character encountered.
258 */
259{
260 uint8_t c;
261 if (!parse_pct_hex(ctx, &c)) {
262 return false;
263 }
264
265 char32_t codepoint;
266 uint8_t n;
267
268resync:
269 if (c < 128) {
270 *result = c;
271 return true;
272 }
273 if ((c & 0b1110'0000) == 0b1100'0000) {
274 codepoint = c & 0b0011'1111;
275 n = 1;
276 } else if ((c & 0b1111'0000) == 0b1110'0000) {
277 codepoint = c & 0b0001'1111;
278 n = 2;
279 } else if ((c & 0b1111'1000) == 0b1111'0000) {
280 codepoint = c & 0b0000'1111;
281 n = 3;
282 } else {
283 // bad UTF-8, however, return it (might be a Latin-1, whatever)
284 *result = c;
285 return true;
286 }
287 while (n--) {
288 char32_t pct;
289 if (!get_next_char(ctx, &pct)) {
290 return false;
291 }
292 if (pct != '%') {
293 unget_char(ctx);
294 return false;
295 }
296 if (!parse_pct_hex(ctx, &c)) {
297 return false;
298 }
299 if ((c & 0b1100'0000) != 0b1000'0000) {
300 // bad UTF-8, try to resync
301 goto resync;
302 }
303 codepoint <<= 6;
304 codepoint |= c & 0x3F;
305 }
306 *result = codepoint;
307 return true;
308}
309
310[[nodiscard]] static bool parse_ups_char(UriParserContext* ctx, char32_t* result, uint8_t extra_delims)
311/*
312 * unreserved / pct-encoded / sub-delims / extra_delims
313 *
314 * Return true if character is valid,
315 * Otherwise return false which means either end of iteration or wrong character encountered.
316 */
317{
318 char32_t c;
319 if (!get_next_char(ctx, &c)) {
320 return false;
321 }
322 if (c > 127) {
323 // as an extension to the original ABNF, allow non-ASCII characters
324 // so unescaped URIs can be parsed too
325 if (ctx->allow_spaces) {
326 if (pw_isprint(c)) {
327 *result = c;
328 return true;
329 }
330 } else {
331 if (pw_isgraph(c)) {
332 *result = c;
333 return true;
334 }
335 }
336 }
337 if (is_ups_char(c, extra_delims)) {
338 *result = c;
339 return true;
340 }
341 if (c != '%') {
342 unget_char(ctx);
343 return false;
344 }
345 return parse_pct_utf8(ctx, result);
346}
347
348[[nodiscard]] static bool parse_ups_str(UriParserContext* ctx, PwValuePtr result)
349{
350 char32_t c;
351 while (parse_ups_char(ctx, &c, 0)) {
352 if (!pw_string_append(result, c)) {
353 return false;
354 }
355 }
356 return true;
357}
358
359[[nodiscard]] static inline bool parse_pchar(UriParserContext* ctx, char32_t* result)
360/*
361 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
362 *
363 * Return false if end of iteration.
364 */
365{
366 return parse_ups_char(ctx, result, CTYPE_PCHAR);
367}
368
369[[nodiscard]] static inline bool parse_pchar_nc(UriParserContext* ctx, char32_t* result)
370/*
371 * pchar = unreserved / pct-encoded / sub-delims / "@"
372 *
373 * Return false if end of iteration.
374 */
375{
376 return parse_ups_char(ctx, result, CTYPE_PCHAR_NC);
377}
378
379[[nodiscard]] static inline bool parse_frag_char(UriParserContext* ctx, char32_t* result)
380/*
381 * pchar / "/" / "?"
382 *
383 * Return false if end of iteration.
384 */
385{
386 return parse_ups_char(ctx, result, CTYPE_FRAG_CHAR);
387}
388
389[[nodiscard]] static bool parse_qchar(UriParserContext* ctx, char32_t* result)
390/*
391 * Same as parse_pchar, except "=" and "&", and replace "+" with space
392 *
393 * The code is based on parse_ups_char.
394 *
395 * Return true if character is valid,
396 * Otherwise return false which means either end of iteration or wrong character encountered.
397 */
398{
399 char32_t c;
400 if (!get_next_char(ctx, &c)) {
401 return false;
402 }
403 if (c == '=' || c == '&') {
404 unget_char(ctx);
405 return false;
406 }
407 if (c > 127) {
408 // as an extension to the original ABNF, allow non-ASCII characters
409 // so unescaped URIs can be parsed too
410 if (pw_isgraph(c)) {
411 *result = c;
412 return true;
413 }
414 }
415 if (is_ups_char(c, CTYPE_PCHAR)) {
416 if (c == '+') {
417 c = ' ';
418 }
419 *result = c;
420 return true;
421 }
422 return parse_pct_utf8(ctx, result);
423}
424
425[[nodiscard]] static bool parse_authority(UriParserContext* ctx, PwValuePtr uri_parts)
426/*
427 * authority = [ userinfo "@" ] host [ ":" port ]
428 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
429 * host = IP-literal / IPv4address / reg-name
430 * port = *DIGIT
431 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
432 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
433 * reg-name = *( unreserved / pct-encoded / sub-delims )
434 *
435 * We split userinfo into username and password.
436 * That's because we have to track ':' delimiter which also separates port from host.
437 */
438{
439 PwValue username = PW_NULL;
440 PwValue password = PW_NULL;
441 PwValue host = PW_NULL;
442 PwValue port = PW_NULL;
443
444 if (end_of(ctx)) {
445 return true;
446 }
447 if (current_char(ctx) == '[') {
448 // IPv6
449 if (!parse_ipv6_addr(ctx, &host)) {
450 return false;
451 }
452 if (end_of(ctx)) {
453 goto done;
454 }
455 if (current_char(ctx) == ':') {
456 goto parse_port;
457 }
458 return true;
459 }
460
461 // expect username
462 username = PwString();
463 if (!parse_ups_str(ctx, &username)) {
464 return false;
465 }
466 if (end_of(ctx)) {
467 // username is actually a host
468 pw_move(&host, &username);
469 goto done;
470 }
471 char32_t c = current_char(ctx);
472 if (c == '@') {
473 // got username, no password
474 skip_char(ctx);
475 goto parse_host;
476 }
477 if (c != ':') {
478 // username is actually a host
479 pw_move(&host, &username);
480 goto done;
481 }
482 // got ':', expect password
483 skip_char(ctx);
484 password = PwString();
485 if (!parse_ups_str(ctx, &password)) {
486 return false;
487 }
488 if (end_of(ctx)) {
489 // username:password is actually host:port
490 pw_move(&host, &username);
491 pw_move(&port, &password);
492 goto done;
493 }
494 if (current_char(ctx) != '@') {
495 // username:password is actually host:port
496 pw_move(&host, &username);
497 pw_move(&port, &password);
498 goto done;
499 }
500
501parse_host:
502 host = PwString();
503 if (!parse_ups_str(ctx, &host)) {
504 return false;
505 }
506 if (end_of(ctx)) {
507 goto done;
508 }
509 if (current_char(ctx) != ':') {
510 goto done;
511 }
512
513parse_port:
514 // current_ptr points to ':'
515 skip_char(ctx);
516 port = PwString();
517 while(get_next_char(ctx, &c)) {
518 if (!pw_is_ascii_digit(c)) {
519 unget_char(ctx);
520 break;
521 }
522 if (!pw_string_append(&port, c)) {
523 return false;
524 }
525 }
526
527done:
528 if (!pw_map_update(uri_parts, &URI_USER, &username)) {
529 return false;
530 }
531 if (!pw_map_update(uri_parts, &URI_PASSWORD, &password)) {
532 return false;
533 }
534 if (!pw_is_null(&host)) {
535 if(!pw_string_lower(&host)) {
536 return false;
537 }
538 PwValue decoded = PW_NULL;
539 if (!pw_idna_decode(&host, &decoded)) {
540 return false;
541 }
542 if (!pw_map_update(uri_parts, &URI_HOST, &decoded)) {
543 return false;
544 }
545 }
546 if (!pw_is_null(&port)) {
547 PwValue n = PW_NULL;
548 if (!pw_parse_number(&port, &n)) {
549 return false;
550 }
551 if (!pw_map_update(uri_parts, &URI_PORT, &n)) {
552 return false;
553 }
554 }
555 return true;
556}
557
558[[nodiscard]] static bool parse_path_abempty(UriParserContext* ctx, PwValuePtr uri_parts)
559/*
560 * path-abempty = *( "/" segment )
561 * segment = *pchar
562 */
563{
564 PwValue path = PW_STRING();
565 char32_t c;
566 while (get_next_char(ctx, &c)) {
567 if (c != '/') {
568 unget_char(ctx);
569 break;
570 }
571 if (!pw_string_append(&path, c)) {
572 return false;
573 }
574 while (!end_of(ctx)) {
575 if (current_char(ctx) == '/') {
576 break;
577 }
578 if (!parse_pchar(ctx, &c)) {
579 break;
580 }
581 if (!pw_string_append(&path, c)) {
582 return false;
583 }
584 }
585 }
586 if (pw_strlen(&path)) {
587 if (!pw_map_update(uri_parts, &URI_PATH, &path)) {
588 return false;
589 }
590 }
591 return true;
592}
593
594[[nodiscard]] static bool parse_path_absolute(UriParserContext* ctx, PwValuePtr result)
595/*
596 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
597 * segment = *pchar
598 * segment-nz = 1*pchar
599 */
600{
601 for (;;) {
602 skip_char(ctx); // always "/" here
603 if (!pw_string_append(result, '/')) {
604 return false;
605 }
606 for (;;) {
607 char32_t c;
608 if (!parse_pchar(ctx, &c)) {
609 // can't guarantee this segment is exactly segment-nz
610 // use path normalization after parsing
611 if (end_of(ctx)) {
612 return true;
613 }
614 if (current_char(ctx) == '/') {
615 break;
616 }
617 }
618 if (!pw_string_append(result, c)) {
619 return false;
620 }
621 }
622 }
623}
624
625[[nodiscard]] static bool parse_path_rootless(UriParserContext* ctx, PwValuePtr result)
626/*
627 * path-rootless = segment-nz *( "/" segment )
628 * segment = *pchar
629 * segment-nz = 1*pchar
630 */
631{
632 for (;;) {
633 char32_t c;
634 if (!parse_pchar(ctx, &c)) {
635 if (end_of(ctx)) {
636 return true;
637 }
638 if (current_char(ctx) == '/') {
639 if (!get_next_char(ctx, &c)) {
640 return true;
641 }
642 // fall through and add slash to the result
643 } else {
644 // unknown character, bail out
645 return true;
646 }
647 }
648 if (!pw_string_append(result, c)) {
649 return false;
650 }
651 }
652}
653
654[[nodiscard]] static bool parse_path_noscheme(UriParserContext* ctx, PwValuePtr result)
655/*
656 * path-noscheme = segment-nz-nc *( "/" segment )
657 * segment = *pchar
658 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
659 * ; non-zero-length segment without any colon ":"
660 */
661{
662 char32_t c;
663 for (;;) {
664 if (!parse_pchar_nc(ctx, &c)) {
665 break;
666 }
667 if (!pw_string_append(result, c)) {
668 return false;
669 }
670 }
671 while (!end_of(ctx)) {
672 if (current_char(ctx) != '/') {
673 break;
674 }
675 skip_char(ctx);
676 if (!pw_string_append(result, '/')) {
677 return false;
678 }
679 while (parse_pchar(ctx, &c)) {
680 if (!pw_string_append(result, '/')) {
681 return false;
682 }
683 }
684 }
685 return true;
686}
687
688[[nodiscard]] static bool parse_path(UriParserContext* ctx, PwValuePtr uri_parts, bool have_scheme)
689/*
690 * If `have_scheme`: path-absolute / path-rootless / path-empty
691 * otherwise: path-absolute / path-noscheme / path-empty
692 *
693 * path-empty = 0<pchar>
694 */
695{
696 if (end_of(ctx)) {
697 // path-empty
698 return true;
699 }
700 PwValue path = PW_STRING();
701 if (current_char(ctx) == '/') {
702 if (!parse_path_absolute(ctx, &path)) {
703 return false;
704 }
705 } else if (have_scheme) {
706 if (!parse_path_rootless(ctx, &path)) {
707 return false;
708 }
709 } else {
710 if (!parse_path_noscheme(ctx, &path)) {
711 return false;
712 }
713 }
714 if (pw_strlen(&path)) {
715 if (!pw_map_update(uri_parts, &URI_PATH, &path)) {
716 return false;
717 }
718 }
719 return true;
720}
721
722[[nodiscard]] static bool parse_key_value(UriParserContext* ctx, PwValuePtr key, PwValuePtr value)
723{
724 char32_t c;
725 while (parse_qchar(ctx, &c)) {
726 if (!pw_string_append(key, c)) {
727 return false;
728 }
729 }
730 if (end_of(ctx)) {
731 return true;
732 }
733 if (current_char(ctx) != '=') {
734 return true;
735 }
736 // got "=", consume it and parse value
737 skip_char(ctx);
738
739 while (parse_qchar(ctx, &c)) {
740 if (!pw_string_append(value, c)) {
741 return false;
742 }
743 }
744 return true;
745}
746
747[[nodiscard]] static bool _pw_parse_uri_query_ctx(UriParserContext* ctx, PwValuePtr result)
748{
749 if (!pw_create(PwTypeId_BasicMap, result)) {
750 return false;
751 }
752 while (!end_of(ctx)) {
753 PwValue key = PW_STRING();
754 PwValue value = PW_STRING();
755 if (!parse_key_value(ctx, &key, &value)) {
756 return false;
757 }
758 if (!pw_map_update(result, &key, &value)) {
759 return false;
760 }
761 if (!end_of(ctx)) {
762 if (current_char(ctx) != '&') {
763 break;
764 }
765 skip_char(ctx); // consume "&"
766 }
767 }
768 return true;
769}
770
771[[nodiscard]] bool _pw_parse_uri_query_iter(PwStringIter* uri, PwValuePtr result)
772{
773 UriParserContext ctx;
774 ctx.uri = *uri;
775 ctx.allow_spaces = false;
776 bool ret = _pw_parse_uri_query_ctx(&ctx, result);
777 if (ret) {
778 ret = check_end_of_uri(&ctx);
779 }
780 *uri = ctx.uri;
781 return ret;
782}
783
784[[nodiscard]] bool _pw_parse_uri_query_pw(PwValuePtr query, PwValuePtr result)
785{
786 PwStringIter iter;
787 _pw_string_iter(query, &iter);
788 return _pw_parse_uri_query_iter(&iter, result);
789}
790
791[[nodiscard]] static bool parse_fragment(UriParserContext* ctx, PwValuePtr uri_parts)
792/*
793 * fragment = *( pchar / "/" / "?" )
794 */
795{
796 PwValue fragment = PW_STRING();
797 char32_t c;
798 while (parse_frag_char(ctx, &c)) {
799 if (!pw_string_append(&fragment, c)) {
800 return false;
801 }
802 }
803 if (pw_strlen(&fragment)) {
804 if (!pw_map_update(uri_parts, &URI_FRAGMENT, &fragment)) {
805 return false;
806 }
807 }
808 return true;
809}
810
811[[nodiscard]] static bool _pw_parse_uri_ctx(UriParserContext* ctx, PwValuePtr uri_parts)
812{
813 // create uri_parts and initialize its members
814
815 if (!pw_create(PwTypeId_BasicMap, uri_parts)) {
816 return false;
817 }
818 for (unsigned i = 0; i < PW_LENGTH(uri_part_keys); i++) {
819 PwValue value = PW_NULL;
820 if (!pw_map_update(uri_parts, &uri_part_keys[i], &value)) {
821 return false;
822 }
823 }
824
825 char32_t c;
826
827 // strip leading C0 control and space characters
828 while (get_next_char(ctx, &c)) {
829 if (c > ' ') {
830 unget_char(ctx);
831 break;
832 }
833 }
834
835 // URI-reference = URI / relative-ref
836 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
837 // relative-ref = relative-part [ "?" query ] [ "#" fragment ]
838 PwValue scheme = PW_NULL;
839 if (!parse_scheme(ctx, &scheme)) {
840 return false;
841 }
842 if (!pw_map_update(uri_parts, &URI_SCHEME, &scheme)) {
843 return false;
844 }
845
846 // hier-part = "//" authority path-abempty
847 // / path-absolute
848 // / path-rootless
849 // / path-empty
850
851 // relative-part = "//" authority path-abempty
852 // / path-absolute
853 // / path-noscheme
854 // / path-empty
855
856 if (parse_double_slash(ctx)) {
857 if (!parse_authority(ctx, uri_parts)) {
858 return false;
859 }
860 if (!parse_path_abempty(ctx, uri_parts)) {
861 return false;
862 }
863 } else {
864 // parse path depending on presence of schema:
865 if (!parse_path(ctx, uri_parts, pw_is_string(&scheme))) {
866 return false;
867 }
868 }
869 if (!get_next_char(ctx, &c)) {
870 return true;
871 }
872 if (c == '?') {
873 PwValue query = PW_NULL;
874 if (!_pw_parse_uri_query_ctx(ctx, &query)) {
875 return false;
876 }
877 if (!pw_map_update(uri_parts, &URI_QUERY, &query)) {
878 return false;
879 }
880 c = current_char(ctx);
881 }
882 if (c == '#') {
883 if (!parse_fragment(ctx, uri_parts)) {
884 return false;
885 }
886 }
887 return true;
888}
889
890[[nodiscard]] bool _pw_parse_uri_iter(PwStringIter* uri, PwValuePtr result, bool allow_spaces)
891{
892 UriParserContext ctx;
893 ctx.uri = *uri;
894 ctx.allow_spaces = allow_spaces;
895 bool ret = _pw_parse_uri_ctx(&ctx, result);
896 if (ret) {
897 ret = check_end_of_uri(&ctx);
898 }
899 *uri = ctx.uri;
900 return ret;
901}
902
903[[nodiscard]] bool _pw_parse_uri_pw(PwValuePtr uri, PwValuePtr result, bool allow_spaces)
904{
905 PwStringIter iter;
906 _pw_string_iter(uri, &iter);
907 return _pw_parse_uri_iter(&iter, result, allow_spaces);
908}