1#include <uchar.h>
  2
  3#include "include/pw.h"
  4#include "include/pwlib/ctype.h"
  5#include "include/pwlib/idna.h"
  6#include "include/pwlib/parsers.h"
  7#include "include/pwlib/uri.h"
  8
  9/*
 10 * The following schemes may need specific handling
 11 *
 12 * - data: https://www.rfc-editor.org/rfc/inline-errata/rfc2397.html
 13 *   The following example is processed incorrectly:
 14 *       data:text/plain;charset=iso-8859-7,%be%d3%be
 15 *   because pct decoder knows nothing about charset and tries to convert to UTF-8
 16 *
 17 * - geo: https://www.rfc-editor.org/rfc/inline-errata/rfc5870.html
 18 *   Parameters parsing?
 19 */
 20
 21static _PwValue uri_part_keys[] = {
 22    PW_STRING("scheme"),
 23    PW_STRING("user"),
 24    PW_STRING("password"),
 25    PW_STRING("host"),
 26    PW_STRING("port"),
 27    PW_STRING("path"),
 28    PW_STRING("query"),
 29    PW_STRING("fragment")
 30};
 31#define URI_SCHEME   uri_part_keys[0]
 32#define URI_USER     uri_part_keys[1]
 33#define URI_PASSWORD uri_part_keys[2]
 34#define URI_HOST     uri_part_keys[3]
 35#define URI_PORT     uri_part_keys[4]
 36#define URI_PATH     uri_part_keys[5]
 37#define URI_QUERY    uri_part_keys[6]
 38#define URI_FRAGMENT uri_part_keys[7]
 39
 40PW_STRUCT(UriParserContext) {
 41    PwStringIter uri;
 42    bool allow_spaces;
 43    unsigned line;  // line in this source file where iteration has stopped
 44};
 45
 46#define CTYPE_URI_WS        1
 47#define CTYPE_SUBDELIM      2
 48#define CTYPE_UNRESERVED    4
 49#define CTYPE_PCHAR         8  // :@
 50#define CTYPE_PCHAR_NC     16  // @
 51#define CTYPE_FRAG_CHAR    32  // :@/?
 52#define CTYPE_SCHEME_CHAR  64
 53
 54static uint8_t chartype[128] = {
 55    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00,
 56    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 57    0x00, 0x02, 0x00, 0x00, 0x02, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x42, 0x02, 0x44, 0x44, 0x20,
 58    0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x28, 0x02, 0x00, 0x02, 0x00, 0x20,
 59    0x38, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44,
 60    0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x00, 0x00, 0x00, 0x00, 0x04,
 61    0x00, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44,
 62    0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x44, 0x00, 0x00, 0x00, 0x04, 0x00
 63};
 64
 65#define is_uri_whitespace(c)  (pw_is_ascii(c) && (chartype[c] & CTYPE_URI_WS))
 66#define is_subdelim(c)        (pw_is_ascii(c) && (chartype[c] & CTYPE_SUBDELIM))
 67#define is_unreserved(c)      (pw_is_ascii(c) && (chartype[c] & CTYPE_UNRESERVED))
 68#define is_ups_char(c, extra) (pw_is_ascii(c) && (chartype[c] & (CTYPE_UNRESERVED | CTYPE_SUBDELIM | (extra))))
 69#define is_scheme_char(c)     (pw_is_ascii(c) && (chartype[c] & CTYPE_SCHEME_CHAR))
 70
 71[[nodiscard]] static bool check_end_of_uri(UriParserContext* ctx)
 72/*
 73 * Check if URI is fully processed
 74 */
 75{
 76    if (ctx->uri.current_ptr >= ctx->uri.end_ptr) {
 77        return true;
 78    }
 79    _PwValue status = PW_STATUS(PweParseError);
 80    status.line_number = ctx->line;
 81    unsigned char_pos = 1 + (ctx->uri.end_ptr - ctx->uri.current_ptr) / ctx->uri.char_size;  // XXX this reports wrong position, need to fix
 82    pw_exception(status, "Bad character encountered at %u", char_pos);
 83    return false;
 84}
 85
 86static inline bool _end_of(UriParserContext* ctx, unsigned line)
 87{
 88    if (ctx->uri.current_ptr < ctx->uri.end_ptr) {
 89        return false;
 90    }
 91    ctx->line = line;
 92    return true;
 93}
 94
 95#define end_of(ctx)  \
 96    _end_of(ctx, __LINE__)
 97
 98[[nodiscard]] static inline bool _get_next_char(UriParserContext* ctx, char32_t* c, unsigned line)
 99{
100    while (_pw_string_iter_next(&ctx->uri, c)) {
101        if (!is_uri_whitespace(*c)) {
102            return true;
103        }
104    }
105    ctx->line = line;
106    return false;
107}
108
109#define get_next_char(ctx, c)  \
110    _get_next_char((ctx), (c), __LINE__)
111
112[[nodiscard]] static inline char32_t current_char(UriParserContext* ctx)
113{
114    return _pw_string_iter_current(&ctx->uri);
115}
116
117static inline void skip_char(UriParserContext* ctx)
118{
119    char32_t c;
120    while (_pw_string_iter_next(&ctx->uri, &c)) {
121        if (!is_uri_whitespace(c)) {
122            return;
123        }
124    }
125}
126
127static inline void unget_char(UriParserContext* ctx)
128{
129    char32_t c;
130    while (_pw_string_iter_prev(&ctx->uri, &c)) {
131        if (!is_uri_whitespace(c)) {
132            return;
133        }
134    }
135}
136
137[[nodiscard]] static bool parse_scheme(UriParserContext* ctx, PwValuePtr result)
138/*
139 * If URI has no scheme, the result is Null.
140 * Returned scheme is always lower case.
141 *
142 * Upon return the iterator position is next to ":" or untouched.
143 *
144 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
145 */
146{
147    pw_destroy(result);  // make sure the result is Null
148
149    char32_t c;
150    PwValue scheme = PW_STRING();
151    uint8_t* saved_current = ctx->uri.current_ptr;
152    if (!get_next_char(ctx, &c)) {
153        return true;
154    }
155    if (!pw_is_ascii_alpha(c)) {
156        goto no_scheme;
157    }
158    // look ahead for scheme
159    do {
160        char lwr = pw_ascii_char_lower(c);
161        if (!pw_string_append(&scheme, lwr)) {
162            return false;
163        }
164        if (!get_next_char(ctx, &c)) {
165            goto no_scheme;
166        }
167    } while (is_scheme_char(c));
168
169    if (c != ':') {
170        goto no_scheme;
171    }
172
173    // got scheme
174    pw_move(result, &scheme);
175    return true;
176
177no_scheme:
178    ctx->uri.current_ptr = saved_current;
179    return true;
180}
181
182[[nodiscard]] static bool parse_double_slash(UriParserContext* ctx)
183/*
184 * Lookahead for double slash.
185 * If present, skip and return true.
186 * Otherwise return false;
187 */
188{
189    uint8_t* saved_current = ctx->uri.current_ptr;
190    for (unsigned i = 0; i < 2; i++) {
191        char32_t c;
192        if (!get_next_char(ctx, &c)) {
193            goto none;
194        }
195        if (c != '/') {
196            goto none;
197        }
198    }
199    return true;
200
201none:
202    ctx->uri.current_ptr = saved_current;
203    return false;
204}
205
206[[nodiscard]] static bool parse_ipv6_addr(UriParserContext* ctx, PwValuePtr result)
207/*
208 * On enter, the URI iterator is at position next to the opening "[".
209 * As a part of URI, IPv6 address must end with "]"
210 */
211{
212    char32_t c;
213    while (get_next_char(ctx, &c)) {
214        if (c == ']') {
215            break;
216        }
217        if (!pw_string_append(result, c)) {
218            return false;
219        }
220    }
221    return true;
222}
223
224[[nodiscard]] static bool parse_pct_hex(UriParserContext* ctx, uint8_t* result)
225/*
226 * Parse two consecutive HEX digits.
227 *
228 * Return true if result is valid,
229 * Otherwise return false which means either end of iteration or wrong character encountered.
230 */
231{
232    char32_t hi;
233    if (!get_next_char(ctx, &hi)) {
234        return false;
235    }
236    if (!pw_parse_hexdigit(&hi)) {
237        unget_char(ctx);
238        return false;
239    }
240    char32_t lo;
241    if (!get_next_char(ctx, &lo)) {
242        return false;
243    }
244    if (!pw_parse_hexdigit(&lo)) {
245        unget_char(ctx);
246        return false;
247    }
248    *result = (uint8_t) ((hi << 4) + lo);
249    return true;
250}
251
252[[nodiscard]] static bool parse_pct_utf8(UriParserContext* ctx, char32_t* result)
253/*
254 * Parse a series of pct-encoded characters as one codepoint in UTF-8.
255 *
256 * Return true if result is valid,
257 * Otherwise return false which means either end of iteration or wrong character encountered.
258 */
259{
260    uint8_t c;
261    if (!parse_pct_hex(ctx, &c)) {
262        return false;
263    }
264
265    char32_t codepoint;
266    uint8_t n;
267
268resync:
269    if (c < 128) {
270        *result = c;
271        return true;
272    }
273    if ((c & 0b1110'0000) == 0b1100'0000) {
274        codepoint = c & 0b0011'1111;
275        n = 1;
276    } else if ((c & 0b1111'0000) == 0b1110'0000) {
277        codepoint = c & 0b0001'1111;
278        n = 2;
279    } else if ((c & 0b1111'1000) == 0b1111'0000) {
280        codepoint = c & 0b0000'1111;
281        n = 3;
282    } else {
283        // bad UTF-8, however, return it (might be a Latin-1, whatever)
284        *result = c;
285        return true;
286    }
287    while (n--) {
288        char32_t pct;
289        if (!get_next_char(ctx, &pct)) {
290            return false;
291        }
292        if (pct != '%') {
293            unget_char(ctx);
294            return false;
295        }
296        if (!parse_pct_hex(ctx, &c)) {
297            return false;
298        }
299        if ((c & 0b1100'0000) != 0b1000'0000) {
300            // bad UTF-8, try to resync
301            goto resync;
302        }
303        codepoint <<= 6;
304        codepoint |= c & 0x3F;
305    }
306    *result = codepoint;
307    return true;
308}
309
310[[nodiscard]] static bool parse_ups_char(UriParserContext* ctx, char32_t* result, uint8_t extra_delims)
311/*
312 * unreserved / pct-encoded / sub-delims / extra_delims
313 *
314 * Return true if character is valid,
315 * Otherwise return false which means either end of iteration or wrong character encountered.
316 */
317{
318    char32_t c;
319    if (!get_next_char(ctx, &c)) {
320        return false;
321    }
322    if (c > 127) {
323        // as an extension to the original ABNF, allow non-ASCII characters
324        // so unescaped URIs can be parsed too
325        if (ctx->allow_spaces) {
326            if (pw_isprint(c)) {
327                *result = c;
328                return true;
329            }
330        } else {
331            if (pw_isgraph(c)) {
332                *result = c;
333                return true;
334            }
335        }
336    }
337    if (is_ups_char(c, extra_delims)) {
338        *result = c;
339        return true;
340    }
341    if (c != '%') {
342        unget_char(ctx);
343        return false;
344    }
345    return parse_pct_utf8(ctx, result);
346}
347
348[[nodiscard]] static bool parse_ups_str(UriParserContext* ctx, PwValuePtr result)
349{
350    char32_t c;
351    while (parse_ups_char(ctx, &c, 0)) {
352        if (!pw_string_append(result, c)) {
353            return false;
354        }
355    }
356    return true;
357}
358
359[[nodiscard]] static inline bool parse_pchar(UriParserContext* ctx, char32_t* result)
360/*
361 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
362 *
363 * Return false if end of iteration.
364 */
365{
366    return parse_ups_char(ctx, result, CTYPE_PCHAR);
367}
368
369[[nodiscard]] static inline bool parse_pchar_nc(UriParserContext* ctx, char32_t* result)
370/*
371 * pchar = unreserved / pct-encoded / sub-delims / "@"
372 *
373 * Return false if end of iteration.
374 */
375{
376    return parse_ups_char(ctx, result, CTYPE_PCHAR_NC);
377}
378
379[[nodiscard]] static inline bool parse_frag_char(UriParserContext* ctx, char32_t* result)
380/*
381 * pchar / "/" / "?"
382 *
383 * Return false if end of iteration.
384 */
385{
386    return parse_ups_char(ctx, result, CTYPE_FRAG_CHAR);
387}
388
389[[nodiscard]] static bool parse_qchar(UriParserContext* ctx, char32_t* result)
390/*
391 * Same as parse_pchar, except "=" and "&", and replace "+" with space
392 *
393 * The code is based on parse_ups_char.
394 *
395 * Return true if character is valid,
396 * Otherwise return false which means either end of iteration or wrong character encountered.
397 */
398{
399    char32_t c;
400    if (!get_next_char(ctx, &c)) {
401        return false;
402    }
403    if (c == '=' || c == '&') {
404        unget_char(ctx);
405        return false;
406    }
407    if (c > 127) {
408        // as an extension to the original ABNF, allow non-ASCII characters
409        // so unescaped URIs can be parsed too
410        if (pw_isgraph(c)) {
411            *result = c;
412            return true;
413        }
414    }
415    if (is_ups_char(c, CTYPE_PCHAR)) {
416        if (c == '+') {
417            c = ' ';
418        }
419        *result = c;
420        return true;
421    }
422    return parse_pct_utf8(ctx, result);
423}
424
425[[nodiscard]] static bool parse_authority(UriParserContext* ctx, PwValuePtr uri_parts)
426/*
427 * authority   = [ userinfo "@" ] host [ ":" port ]
428 * userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
429 * host        = IP-literal / IPv4address / reg-name
430 * port        = *DIGIT
431 * IP-literal  = "[" ( IPv6address / IPvFuture  ) "]"
432 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
433 * reg-name    = *( unreserved / pct-encoded / sub-delims )
434 *
435 * We split userinfo into username and password.
436 * That's because we have to track ':' delimiter which also separates port from host.
437 */
438{
439    PwValue username = PW_NULL;
440    PwValue password = PW_NULL;
441    PwValue host = PW_NULL;
442    PwValue port = PW_NULL;
443
444    if (end_of(ctx)) {
445        return true;
446    }
447    if (current_char(ctx) == '[') {
448        // IPv6
449        if (!parse_ipv6_addr(ctx, &host)) {
450            return false;
451        }
452        if (end_of(ctx)) {
453            goto done;
454        }
455        if (current_char(ctx) == ':') {
456            goto parse_port;
457        }
458        return true;
459    }
460
461    // expect username
462    username = PwString();
463    if (!parse_ups_str(ctx, &username)) {
464        return false;
465    }
466    if (end_of(ctx)) {
467        // username is actually a host
468        pw_move(&host, &username);
469        goto done;
470    }
471    char32_t c = current_char(ctx);
472    if (c == '@') {
473        // got username, no password
474        skip_char(ctx);
475        goto parse_host;
476    }
477    if (c != ':') {
478        // username is actually a host
479        pw_move(&host, &username);
480        goto done;
481    }
482    // got ':', expect password
483    skip_char(ctx);
484    password = PwString();
485    if (!parse_ups_str(ctx, &password)) {
486        return false;
487    }
488    if (end_of(ctx)) {
489        // username:password is actually host:port
490        pw_move(&host, &username);
491        pw_move(&port, &password);
492        goto done;
493    }
494    if (current_char(ctx) != '@') {
495        // username:password is actually host:port
496        pw_move(&host, &username);
497        pw_move(&port, &password);
498        goto done;
499    }
500
501parse_host:
502    host = PwString();
503    if (!parse_ups_str(ctx, &host)) {
504        return false;
505    }
506    if (end_of(ctx)) {
507        goto done;
508    }
509    if (current_char(ctx) != ':') {
510        goto done;
511    }
512
513parse_port:
514    // current_ptr points to ':'
515    skip_char(ctx);
516    port = PwString();
517    while(get_next_char(ctx, &c)) {
518        if (!pw_is_ascii_digit(c)) {
519            unget_char(ctx);
520            break;
521        }
522        if (!pw_string_append(&port, c)) {
523            return false;
524        }
525    }
526
527done:
528    if (!pw_map_update(uri_parts, &URI_USER, &username)) {
529        return false;
530    }
531    if (!pw_map_update(uri_parts, &URI_PASSWORD, &password)) {
532        return false;
533    }
534    if (!pw_is_null(&host)) {
535        if(!pw_string_lower(&host)) {
536            return false;
537        }
538        PwValue decoded = PW_NULL;
539        if (!pw_idna_decode(&host, &decoded)) {
540            return false;
541        }
542        if (!pw_map_update(uri_parts, &URI_HOST, &decoded)) {
543            return false;
544        }
545    }
546    if (!pw_is_null(&port)) {
547        PwValue n = PW_NULL;
548        if (!pw_parse_number(&port, &n)) {
549            return false;
550        }
551        if (!pw_map_update(uri_parts, &URI_PORT, &n)) {
552            return false;
553        }
554    }
555    return true;
556}
557
558[[nodiscard]] static bool parse_path_abempty(UriParserContext* ctx, PwValuePtr uri_parts)
559/*
560 * path-abempty = *( "/" segment )
561 * segment      = *pchar
562 */
563{
564    PwValue path = PW_STRING();
565    char32_t c;
566    while (get_next_char(ctx, &c)) {
567        if (c != '/') {
568            unget_char(ctx);
569            break;
570        }
571        if (!pw_string_append(&path, c)) {
572            return false;
573        }
574        while (!end_of(ctx)) {
575            if (current_char(ctx) == '/') {
576                break;
577            }
578            if (!parse_pchar(ctx, &c)) {
579                break;
580            }
581            if (!pw_string_append(&path, c)) {
582                return false;
583            }
584        }
585    }
586    if (pw_strlen(&path)) {
587        if (!pw_map_update(uri_parts, &URI_PATH, &path)) {
588            return false;
589        }
590    }
591    return true;
592}
593
594[[nodiscard]] static bool parse_path_absolute(UriParserContext* ctx, PwValuePtr result)
595/*
596 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
597 * segment       = *pchar
598 * segment-nz    = 1*pchar
599 */
600{
601    for (;;) {
602        skip_char(ctx);  // always "/" here
603        if (!pw_string_append(result, '/')) {
604            return false;
605        }
606        for (;;) {
607            char32_t c;
608            if (!parse_pchar(ctx, &c)) {
609                // can't guarantee this segment is exactly segment-nz
610                // use path normalization after parsing
611                if (end_of(ctx)) {
612                    return true;
613                }
614                if (current_char(ctx) == '/') {
615                    break;
616                }
617            }
618            if (!pw_string_append(result, c)) {
619                return false;
620            }
621        }
622    }
623}
624
625[[nodiscard]] static bool parse_path_rootless(UriParserContext* ctx, PwValuePtr result)
626/*
627 * path-rootless = segment-nz *( "/" segment )
628 * segment       = *pchar
629 * segment-nz    = 1*pchar
630 */
631{
632    for (;;) {
633        char32_t c;
634        if (!parse_pchar(ctx, &c)) {
635            if (end_of(ctx)) {
636                return true;
637            }
638            if (current_char(ctx) == '/') {
639                if (!get_next_char(ctx, &c)) {
640                    return true;
641                }
642                // fall through and add slash to the result
643            } else {
644                // unknown character, bail out
645                return true;
646            }
647        }
648        if (!pw_string_append(result, c)) {
649            return false;
650        }
651    }
652}
653
654[[nodiscard]] static bool parse_path_noscheme(UriParserContext* ctx, PwValuePtr result)
655/*
656 * path-noscheme = segment-nz-nc *( "/" segment )
657 * segment       = *pchar
658 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
659 *               ; non-zero-length segment without any colon ":"
660 */
661{
662    char32_t c;
663    for (;;) {
664        if (!parse_pchar_nc(ctx, &c)) {
665            break;
666        }
667        if (!pw_string_append(result, c)) {
668            return false;
669        }
670    }
671    while (!end_of(ctx)) {
672        if (current_char(ctx) != '/') {
673            break;
674        }
675        skip_char(ctx);
676        if (!pw_string_append(result, '/')) {
677            return false;
678        }
679        while (parse_pchar(ctx, &c)) {
680            if (!pw_string_append(result, '/')) {
681                return false;
682            }
683        }
684    }
685    return true;
686}
687
688[[nodiscard]] static bool parse_path(UriParserContext* ctx, PwValuePtr uri_parts, bool have_scheme)
689/*
690 * If `have_scheme`: path-absolute / path-rootless / path-empty
691 * otherwise:        path-absolute / path-noscheme / path-empty
692 *
693 * path-empty = 0<pchar>
694 */
695{
696    if (end_of(ctx)) {
697        // path-empty
698        return true;
699    }
700    PwValue path = PW_STRING();
701    if (current_char(ctx) == '/') {
702        if (!parse_path_absolute(ctx, &path)) {
703            return false;
704        }
705    } else if (have_scheme) {
706        if (!parse_path_rootless(ctx, &path)) {
707            return false;
708        }
709    } else {
710        if (!parse_path_noscheme(ctx, &path)) {
711            return false;
712        }
713    }
714    if (pw_strlen(&path)) {
715        if (!pw_map_update(uri_parts, &URI_PATH, &path)) {
716            return false;
717        }
718    }
719    return true;
720}
721
722[[nodiscard]] static bool parse_key_value(UriParserContext* ctx, PwValuePtr key, PwValuePtr value)
723{
724    char32_t c;
725    while (parse_qchar(ctx, &c)) {
726        if (!pw_string_append(key, c)) {
727            return false;
728        }
729    }
730    if (end_of(ctx)) {
731        return true;
732    }
733    if (current_char(ctx) != '=') {
734        return true;
735    }
736    // got "=", consume it and parse value
737    skip_char(ctx);
738
739    while (parse_qchar(ctx, &c)) {
740        if (!pw_string_append(value, c)) {
741            return false;
742        }
743    }
744    return true;
745}
746
747[[nodiscard]] static bool _pw_parse_uri_query_ctx(UriParserContext* ctx, PwValuePtr result)
748{
749    if (!pw_create(PwTypeId_BasicMap, result)) {
750        return false;
751    }
752    while (!end_of(ctx)) {
753        PwValue key = PW_STRING();
754        PwValue value = PW_STRING();
755        if (!parse_key_value(ctx, &key, &value)) {
756            return false;
757        }
758        if (!pw_map_update(result, &key, &value)) {
759            return false;
760        }
761        if (!end_of(ctx)) {
762            if (current_char(ctx) != '&') {
763                break;
764            }
765            skip_char(ctx);  // consume "&"
766        }
767    }
768    return true;
769}
770
771[[nodiscard]] bool _pw_parse_uri_query_iter(PwStringIter* uri, PwValuePtr result)
772{
773    UriParserContext ctx;
774    ctx.uri = *uri;
775    ctx.allow_spaces = false;
776    bool ret = _pw_parse_uri_query_ctx(&ctx, result);
777    if (ret) {
778        ret = check_end_of_uri(&ctx);
779    }
780    *uri = ctx.uri;
781    return ret;
782}
783
784[[nodiscard]] bool _pw_parse_uri_query_pw(PwValuePtr query, PwValuePtr result)
785{
786    PwStringIter iter;
787    _pw_string_iter(query, &iter);
788    return _pw_parse_uri_query_iter(&iter, result);
789}
790
791[[nodiscard]] static bool parse_fragment(UriParserContext* ctx, PwValuePtr uri_parts)
792/*
793 * fragment = *( pchar / "/" / "?" )
794 */
795{
796    PwValue fragment = PW_STRING();
797    char32_t c;
798    while (parse_frag_char(ctx, &c)) {
799        if (!pw_string_append(&fragment, c)) {
800            return false;
801        }
802    }
803    if (pw_strlen(&fragment)) {
804        if (!pw_map_update(uri_parts, &URI_FRAGMENT, &fragment)) {
805            return false;
806        }
807    }
808    return true;
809}
810
811[[nodiscard]] static bool _pw_parse_uri_ctx(UriParserContext* ctx, PwValuePtr uri_parts)
812{
813    // create uri_parts and initialize its members
814
815    if (!pw_create(PwTypeId_BasicMap, uri_parts)) {
816        return false;
817    }
818    for (unsigned i = 0; i < PW_LENGTH(uri_part_keys); i++) {
819        PwValue value = PW_NULL;
820        if (!pw_map_update(uri_parts, &uri_part_keys[i], &value)) {
821            return false;
822        }
823    }
824
825    char32_t c;
826
827    // strip leading C0 control and space characters
828    while (get_next_char(ctx, &c)) {
829        if (c > ' ') {
830            unget_char(ctx);
831            break;
832        }
833    }
834
835    // URI-reference = URI / relative-ref
836    // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
837    // relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
838    PwValue scheme = PW_NULL;
839    if (!parse_scheme(ctx, &scheme)) {
840        return false;
841    }
842    if (!pw_map_update(uri_parts, &URI_SCHEME, &scheme)) {
843        return false;
844    }
845
846    // hier-part = "//" authority path-abempty
847    //           / path-absolute
848    //           / path-rootless
849    //           / path-empty
850
851    // relative-part = "//" authority path-abempty
852    //               / path-absolute
853    //               / path-noscheme
854    //               / path-empty
855
856    if (parse_double_slash(ctx)) {
857        if (!parse_authority(ctx, uri_parts)) {
858            return false;
859        }
860        if (!parse_path_abempty(ctx, uri_parts)) {
861            return false;
862        }
863    } else {
864        // parse path depending on presence of schema:
865        if (!parse_path(ctx, uri_parts, pw_is_string(&scheme))) {
866            return false;
867        }
868    }
869    if (!get_next_char(ctx, &c)) {
870        return true;
871    }
872    if (c == '?') {
873        PwValue query = PW_NULL;
874        if (!_pw_parse_uri_query_ctx(ctx, &query)) {
875            return false;
876        }
877        if (!pw_map_update(uri_parts, &URI_QUERY, &query)) {
878            return false;
879        }
880        c = current_char(ctx);
881    }
882    if (c == '#') {
883        if (!parse_fragment(ctx, uri_parts)) {
884            return false;
885        }
886    }
887    return true;
888}
889
890[[nodiscard]] bool _pw_parse_uri_iter(PwStringIter* uri, PwValuePtr result, bool allow_spaces)
891{
892    UriParserContext ctx;
893    ctx.uri = *uri;
894    ctx.allow_spaces = allow_spaces;
895    bool ret = _pw_parse_uri_ctx(&ctx, result);
896    if (ret) {
897        ret = check_end_of_uri(&ctx);
898    }
899    *uri = ctx.uri;
900    return ret;
901}
902
903[[nodiscard]] bool _pw_parse_uri_pw(PwValuePtr uri, PwValuePtr result, bool allow_spaces)
904{
905    PwStringIter iter;
906    _pw_string_iter(uri, &iter);
907    return _pw_parse_uri_iter(&iter, result, allow_spaces);
908}