1#include "include/pw.h"
2#include "include/pwlib/idna.h"
3
4#include "src/lib/idna/idna_internal.h"
5
6/*
7 * Caveat: this is the very basic implementation.
8 *
9 * The following normative documents should be revised and applied where necessary:
10 *
11 * https://www.rfc-editor.org/rfc/inline-errata/rfc5890.html Internationalized Domain Names for Applications (IDNA): Definitions and Document Framework
12 * https://www.rfc-editor.org/rfc/rfc5891.html Internationalized Domain Names in Applications (IDNA): Protocol
13 * https://www.rfc-editor.org/rfc/inline-errata/rfc5892.html The Unicode Code Points and Internationalized Domain Names for Applications (IDNA)
14 * https://www.rfc-editor.org/rfc/rfc5893.html Right-to-Left Scripts for Internationalized Domain Names for Applications (IDNA)
15 * https://www.rfc-editor.org/rfc/rfc5894.html Internationalized Domain Names for Applications (IDNA): Background, Explanation, and Rationale
16 * https://www.rfc-editor.org/rfc/rfc5895.html Mapping Characters for Internationalized Domain Names in Applications (IDNA) 2008
17 * https://www.rfc-editor.org/rfc/rfc6452.html The Unicode Code Points and Internationalized Domain Names for Applications (IDNA) - Unicode 6.0
18 * https://www.rfc-editor.org/rfc/rfc8753.html Internationalized Domain Names for Applications (IDNA) Review for New Unicode Versions
19 * https://www.rfc-editor.org/rfc/rfc9233.html Internationalized Domain Names for Applications 2008 (IDNA2008) and Unicode 12.0.0
20 */
21
22/*
23This code is based on:
24
25punycode.c from RFC 3492
26http://www.nicemice.net/idn/
27Adam M. Costello
28http://www.nicemice.net/amc/
29
30This is ANSI C code (C89) implementing Punycode (RFC 3492).
31*/
32
33static unsigned decode_digit(unsigned cp)
34{
35 return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 :
36 cp - 97 < 26 ? cp - 97 : base;
37}
38
39[[nodiscard]] static bool decode(PwValuePtr input, PwValuePtr output)
40{
41 if (!pw_startswith(input, "xn--")) {
42 pw_clone2(output, input);
43 return true;
44 }
45 PwStringIter iter;
46 _pw_string_iter(input, &iter);
47 iter.current_ptr += 4 * iter.char_size; // skip prefix
48
49 /* Handle the basic code points: Let b be the number of input code */
50 /* points before the last delimiter, or 0 if there is none, then */
51 /* copy the first b code points to the output. */
52
53 unsigned b;
54 if (pw_strrchr(input, delimiter, UINT_MAX, &b)) {
55 if (b > 4) {
56 if (!pw_string_append_substring(output, input, 4, b)) {
57 return false;
58 }
59 // check if code points are really basic
60 PwStringIter iter_out;
61 _pw_string_iter(output, &iter_out);
62 char32_t c;
63 while (_pw_string_iter_next(&iter_out, &c)) {
64 if (!basic(c)) {
65 pw_set_status(PwStatus(PweBadInput, "Bad punycode"));
66 return false;
67 }
68 }
69 // skip copied codepoints
70 iter.current_ptr = iter.start_ptr + (b + 1) * iter.char_size;
71 }
72 }
73
74 /* Main decoding loop */
75
76 unsigned n, out, i, bias, oldi, w, k, digit, t;
77
78 /* Initialize the state: */
79
80 n = initial_n;
81 out = i = 0; // out is the number of code points in the output array
82 bias = initial_bias;
83
84 while (iter.current_ptr < iter.end_ptr) {
85
86 /* Decode a generalized variable-length integer into delta, */
87 /* which gets added to i. The overflow checking is easier */
88 /* if we increase i as we go, then subtract off its starting */
89 /* value at the end to obtain delta. */
90
91 for (oldi = i, w = 1, k = base; ; k += base) {
92 char32_t c;
93 if (!_pw_string_iter_next(&iter, &c)) {
94 break;
95 }
96 digit = decode_digit(c);
97 if (digit >= base) {
98 pw_set_status(PwStatus(PweBadInput, "Bad punycode"));
99 return false;
100 }
101 if (digit > (maxint - i) / w) {
102 pw_set_status(PwStatus(PweBadInput, "Punycode overflow"));
103 return false;
104 }
105 i += digit * w;
106 t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
107 k >= bias + tmax ? tmax : k - bias;
108 if (digit < t) {
109 break;
110 }
111 if (w > maxint / (base - t)) {
112 pw_set_status(PwStatus(PweBadInput, "Punycode overflow"));
113 return false;
114 }
115 w *= (base - t);
116 }
117
118 bias = adapt(i - oldi, out + 1, oldi == 0);
119
120 /* i was supposed to wrap around from out+1 to 0, */
121 /* incrementing n each time, so we'll fix that now: */
122
123 if (i / (out + 1) > maxint - n) {
124 pw_set_status(PwStatus(PweBadInput, "Punycode overflow"));
125 return false;
126 }
127 n += i / (out + 1);
128 i %= (out + 1);
129
130 /* Insert n at position i of the output: */
131 if (!pw_string_insert(output, i, n)) {
132 return false;
133 }
134 i++;
135 out++;
136 }
137 return true;
138}
139
140[[nodiscard]] bool pw_idna_decode(PwValuePtr str, PwValuePtr result)
141{
142 PwValue str_parts = PW_NULL;
143 if (!pw_string_split_chr(str, '.', 0, &str_parts)) {
144 return false;
145 }
146 PwValue result_parts = PW_NULL;
147 if (!pw_create(PwTypeId_BasicArray, &result_parts)) {
148 return false;
149 }
150 unsigned n = pw_array_length(&str_parts);
151 for (unsigned i = 0; i < n; i++) {
152 PwValue s_part = PW_NULL;
153 if (!pw_array_item(&str_parts, i, &s_part)) {
154 return false;
155 }
156 PwValue r_part = PW_STRING();
157 if (!decode(&s_part, &r_part)) {
158 return false;
159 }
160 if (!pw_array_append(&result_parts, &r_part)) {
161 return false;
162 }
163 }
164 return pw_array_join(&result_parts, '.', result);
165}