1#include "include/pwlib/ctype.h"
2
3uint8_t _pw_chartype[128] = {
4 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
5 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
6 0x01, 0x30, 0x30, 0x30, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x30, 0x30, 0x30, 0x30,
7 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x30, 0x30, 0x20, 0x20, 0x20, 0x30,
8 0x30, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2,
9 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0x30, 0x30, 0x30, 0x20, 0x30,
10 0x20, 0x6a, 0x6a, 0x6a, 0x6a, 0x6a, 0x6a, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62,
11 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x30, 0x20, 0x30, 0x20, 0x00
12};
13
14#ifdef PW_MAKE_CTYPE
15
16#include <ctype.h>
17#include <stdio.h>
18#include <unicode/uchar.h>
19
20static bool rfc_is_alpha(unsigned char c)
21/*
22 * ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
23 */
24{
25 return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
26}
27
28static bool rfc_is_ctl(unsigned char c)
29/*
30 * https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
31 *
32 * CTL = %x00-1F / %x7F
33 *
34 * i.e. any US-ASCII control character (octets 0 - 31) and DEL (127)
35 */
36{
37 return (0 <= c && c <= 31) || c == 127;
38}
39
40static bool rfc_is_vchar(unsigned char c)
41{
42 return 0x21 <= c && c <= 0x7E;
43}
44
45int main(int argc, char* argv[])
46{
47 for (unsigned char c = 0; c < 128; c++) {
48 uint8_t bits = 0;
49 {
50 bool u = u_isspace(c);
51 bool s = isspace(c);
52 if (s || u) {
53 /*
54 here's what we've got:
55
56 1C: isspace=0 u_isspace=1
57 1D: isspace=0 u_isspace=1
58 1E: isspace=0 u_isspace=1
59 1F: isspace=0 u_isspace=1
60
61 if (s && u) {
62 bits |= PW_CTYPE_SPACE;
63 } else {
64 fprintf(stderr, "%02X: isspace=%d u_isspace=%d\n", c, s, u);
65 }
66
67 As long as these characters are actually control characters, use isspace variant.
68
69 On second thought, isspace also includes certical tab and form feed
70 that aren't allowed in Core Rules of https://datatracker.ietf.org/doc/html/rfc5234
71 So, let it be ICU version.
72 */
73 if (u) {
74 bits |= PW_CTYPE_SPACE;
75 }
76 }
77 }
78 {
79 bool u = u_isblank(c);
80 bool s = isblank(c);
81 if (s || u) {
82 if (s && u) {
83// bits |= PW_CTYPE_BLANK;
84 } else {
85 fprintf(stderr, "%02X: isblank=%d u_isblank=%d\n", c, s, u);
86 }
87 }
88 }
89 {
90 bool u = u_isalpha(c);
91 bool s = isalpha(c);
92 bool r = rfc_is_alpha(c);
93 if (s || u|| r) {
94 if (s && u && r) {
95 bits |= PW_CTYPE_ALPHA;
96 } else {
97 fprintf(stderr, "%02X: isalpha=%d u_isalpha=%d rfc_is_alpha=%d\n", c, s, u, r);
98 }
99 }
100 }
101 {
102 bool u = u_isdigit(c);
103 bool s = isdigit(c);
104 if (s || u) {
105 if (s && u) {
106 bits |= PW_CTYPE_DIGIT;
107 } else {
108 fprintf(stderr, "%02X: isdigit=%d u_isdigit=%d\n", c, s, u);
109 }
110 }
111 }
112 {
113 bool u = u_isxdigit(c);
114 bool s = isxdigit(c);
115 if (s || u) {
116 if (s && u) {
117 bits |= PW_CTYPE_XDIGIT;
118 } else {
119 fprintf(stderr, "%02X: isxdigit=%d u_isxdigit=%d\n", c, s, u);
120 }
121 }
122 }
123 {
124 bool u = u_ispunct(c);
125 bool s = ispunct(c);
126 if (s || u) {
127 /*
128 here's what we've got:
129
130 24: ispunct=1 u_ispunct=0
131 2B: ispunct=1 u_ispunct=0
132 3C: ispunct=1 u_ispunct=0
133 3D: ispunct=1 u_ispunct=0
134 3E: ispunct=1 u_ispunct=0
135 5E: ispunct=1 u_ispunct=0
136 60: ispunct=1 u_ispunct=0
137 7C: ispunct=1 u_ispunct=0
138 7E: ispunct=1 u_ispunct=0
139
140 if (s && u) {
141 bits |= PW_CTYPE_PUNCT;
142 } else {
143 fprintf(stderr, "%02X: ispunct=%d u_ispunct=%d\n", c, s, u);
144 }
145 */
146 // pet is unsure, let it be ICU version:
147 if (u) {
148 bits |= PW_CTYPE_PUNCT;
149 }
150 }
151 }
152 {
153 bool u = u_iscntrl(c);
154 bool s = iscntrl(c);
155 bool r = rfc_is_ctl(c);
156 if (s || u || r) {
157 if (s && u && r) {
158// bits |= PW_CTYPE_CTL;
159 } else {
160 fprintf(stderr, "%02X: iscntrl=%d u_iscntrl=%d rfc_is_ctl=%d\n", c, s, u, r);
161 }
162 }
163 }
164 {
165 bool u = u_isgraph(c);
166 bool s = isgraph(c);
167 bool r = rfc_is_vchar(c);
168 if (s || u || r) {
169 if (s && u && r) {
170 bits |= PW_CTYPE_GRAPH;
171 } else {
172 fprintf(stderr, "%02X: isgraph=%d u_isgraph=%d rfc_is_vchar=%d\n", c, s, u, r);
173 }
174 }
175 }
176 {
177 bool u = u_isprint(c);
178 bool s = isprint(c);
179 if (s || u) {
180 if (s && u) {
181// bits |= PW_CTYPE_PRINT;
182 } else {
183 fprintf(stderr, "%02X: isprint=%d u_isprint=%d\n", c, s, u);
184 }
185 }
186 }
187 {
188 bool u = u_islower(c);
189 bool s = islower(c);
190 if (s || u) {
191 if (s && u) {
192 bits |= PW_CTYPE_LOWER;
193 } else {
194 fprintf(stderr, "%02X: islower=%d u_islower=%d\n", c, s, u);
195 }
196 }
197 }
198 {
199 bool u = u_isupper(c);
200 bool s = isupper(c);
201 if (s || u) {
202 if (s && u) {
203 bits |= PW_CTYPE_UPPER;
204 } else {
205 fprintf(stderr, "%02X: isupper=%d u_isupper=%d\n", c, s, u);
206 }
207 }
208 }
209
210 _pw_chartype[c] = bits;
211 }
212
213 puts(" 000000000011111111112222222222333");
214 printf(" ");
215 for (unsigned char c = 0; c < 128; c++) {
216 if (c <= 32 || c == 127) {
217 printf("%d", c % 10);
218 } else {
219 putchar(c);
220 }
221 }
222 putchar('\n');
223
224#define BAR(mask) \
225 printf("%6s ", #mask); \
226 for (unsigned char c = 0; c < 128; c++) { \
227 if (_pw_chartype[c] & PW_CTYPE_##mask) { \
228 putchar('1'); \
229 } else { \
230 putchar(' '); \
231 } \
232 } \
233 putchar('\n');
234
235 BAR(SPACE);
236 BAR(ALPHA);
237 BAR(DIGIT);
238 BAR(XDIGIT);
239 BAR(PUNCT);
240 BAR(GRAPH);
241 BAR(LOWER);
242 BAR(UPPER);
243
244 // print array initializer
245 for (unsigned char c = 0, column = 0; c < 128; c++) {
246 printf("0x%02x", _pw_chartype[c]);
247 if (c < 127) {
248 putchar(',');
249 }
250 if (++column % 16) {
251 putchar(' ');
252 } else {
253 putchar('\n');
254 }
255 }
256
257 return 0;
258}
259
260#endif