1#include "include/pw.h"
  2#include "include/pwlib/idna.h"
  3
  4#include "src/lib/idna/idna_internal.h"
  5
  6/*
  7 * Caveat: this is the very basic implementation.
  8 *
  9 * The following normative documents should be revised and applied where necessary:
 10 *
 11 * https://www.rfc-editor.org/rfc/inline-errata/rfc5890.html Internationalized Domain Names for Applications (IDNA): Definitions and Document Framework
 12 * https://www.rfc-editor.org/rfc/rfc5891.html Internationalized Domain Names in Applications (IDNA): Protocol
 13 * https://www.rfc-editor.org/rfc/inline-errata/rfc5892.html The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)
 14 * https://www.rfc-editor.org/rfc/rfc5893.html Right-to-Left Scripts for Internationalized Domain Names for Applications (IDNA)
 15 * https://www.rfc-editor.org/rfc/rfc5894.html Internationalized Domain Names for Applications (IDNA): Background, Explanation, and Rationale
 16 * https://www.rfc-editor.org/rfc/rfc5895.html Mapping Characters for Internationalized Domain Names in Applications (IDNA) 2008
 17 * https://www.rfc-editor.org/rfc/rfc6452.html The Unicode Code Points and Internationalized Domain Names for Applications (IDNA) - Unicode 6.0
 18 * https://www.rfc-editor.org/rfc/rfc8753.html Internationalized Domain Names for Applications (IDNA) Review for New Unicode Versions
 19 * https://www.rfc-editor.org/rfc/rfc9233.html Internationalized Domain Names for Applications 2008 (IDNA2008) and Unicode 12.0.0
 20 */
 21
 22/*
 23This code is based on:
 24
 25punycode.c from RFC 3492
 26http://www.nicemice.net/idn/
 27Adam M. Costello
 28http://www.nicemice.net/amc/
 29
 30This is ANSI C code (C89) implementing Punycode (RFC 3492).
 31*/
 32
 33static unsigned decode_digit(unsigned cp)
 34{
 35    return  cp - 48 < 10 ? cp - 22 :  cp - 65 < 26 ? cp - 65 :
 36            cp - 97 < 26 ? cp - 97 :  base;
 37}
 38
 39[[nodiscard]] static bool decode(PwValuePtr input, PwValuePtr output)
 40{
 41    if (!pw_startswith(input, "xn--")) {
 42        pw_clone2(output, input);
 43        return true;
 44    }
 45    PwStringIter iter;
 46    _pw_string_iter(input, &iter);
 47    iter.current_ptr += 4 * iter.char_size;  // skip prefix
 48
 49    /* Handle the basic code points:  Let b be the number of input code */
 50    /* points before the last delimiter, or 0 if there is none, then    */
 51    /* copy the first b code points to the output.                      */
 52
 53    unsigned b;
 54    if (pw_strrchr(input, delimiter, UINT_MAX, &b)) {
 55        if (b > 4) {
 56            if (!pw_string_append_substring(output, input, 4, b)) {
 57                return false;
 58            }
 59            // check if code points are really basic
 60            PwStringIter iter_out;
 61            _pw_string_iter(output, &iter_out);
 62            char32_t c;
 63            while (_pw_string_iter_next(&iter_out, &c)) {
 64                if (!basic(c)) {
 65                    pw_set_status(PwStatus(PweBadInput, "Bad punycode"));
 66                    return false;
 67                }
 68            }
 69            // skip copied codepoints
 70            iter.current_ptr = iter.start_ptr + (b + 1) * iter.char_size;
 71        }
 72    }
 73
 74    /* Main decoding loop */
 75
 76    unsigned n, out, i, bias, oldi, w, k, digit, t;
 77
 78    /* Initialize the state: */
 79
 80    n = initial_n;
 81    out = i = 0;           // out is the number of code points in the output array
 82    bias = initial_bias;
 83
 84    while (iter.current_ptr < iter.end_ptr) {
 85
 86        /* Decode a generalized variable-length integer into delta,  */
 87        /* which gets added to i.  The overflow checking is easier   */
 88        /* if we increase i as we go, then subtract off its starting */
 89        /* value at the end to obtain delta.                         */
 90
 91        for (oldi = i, w = 1, k = base;  ;  k += base) {
 92            char32_t c;
 93            if (!_pw_string_iter_next(&iter, &c)) {
 94                break;
 95            }
 96            digit = decode_digit(c);
 97            if (digit >= base) {
 98                pw_set_status(PwStatus(PweBadInput, "Bad punycode"));
 99                return false;
100            }
101            if (digit > (maxint - i) / w) {
102                pw_set_status(PwStatus(PweBadInput, "Punycode overflow"));
103                return false;
104            }
105            i += digit * w;
106            t = k <= bias /* + tmin */ ? tmin :     /* +tmin not needed */
107                k >= bias + tmax ? tmax : k - bias;
108            if (digit < t) {
109                break;
110            }
111            if (w > maxint / (base - t)) {
112                pw_set_status(PwStatus(PweBadInput, "Punycode overflow"));
113                return false;
114            }
115            w *= (base - t);
116        }
117
118        bias = adapt(i - oldi, out + 1, oldi == 0);
119
120        /* i was supposed to wrap around from out+1 to 0,   */
121        /* incrementing n each time, so we'll fix that now: */
122
123        if (i / (out + 1) > maxint - n) {
124            pw_set_status(PwStatus(PweBadInput, "Punycode overflow"));
125            return false;
126        }
127        n += i / (out + 1);
128        i %= (out + 1);
129
130        /* Insert n at position i of the output: */
131        if (!pw_string_insert(output, i, n)) {
132            return false;
133        }
134        i++;
135        out++;
136    }
137    return true;
138}
139
140[[nodiscard]] bool pw_idna_decode(PwValuePtr str, PwValuePtr result)
141{
142    PwValue str_parts = PW_NULL;
143    if (!pw_string_split_chr(str, '.', 0, &str_parts)) {
144        return false;
145    }
146    PwValue result_parts = PW_NULL;
147    if (!pw_create(PwTypeId_BasicArray, &result_parts)) {
148        return false;
149    }
150    unsigned n = pw_array_length(&str_parts);
151    for (unsigned i = 0; i < n; i++) {
152        PwValue s_part = PW_NULL;
153        if (!pw_array_item(&str_parts, i, &s_part)) {
154            return false;
155        }
156        PwValue r_part = PW_STRING();
157        if (!decode(&s_part, &r_part)) {
158            return false;
159        }
160        if (!pw_array_append(&result_parts, &r_part)) {
161            return false;
162        }
163    }
164    return pw_array_join(&result_parts, '.', result);
165}