1#include "include/pwlib/ctype.h"
  2
  3uint8_t _pw_chartype[128] = {
  4    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
  5    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
  6    0x01, 0x30, 0x30, 0x30, 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, 0x30, 0x30, 0x30, 0x30,
  7    0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x2c, 0x30, 0x30, 0x20, 0x20, 0x20, 0x30,
  8    0x30, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2,
  9    0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0xa2, 0x30, 0x30, 0x30, 0x20, 0x30,
 10    0x20, 0x6a, 0x6a, 0x6a, 0x6a, 0x6a, 0x6a, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62,
 11    0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x62, 0x30, 0x20, 0x30, 0x20, 0x00
 12};
 13
 14#ifdef PW_MAKE_CTYPE
 15
 16#include <ctype.h>
 17#include <stdio.h>
 18#include <unicode/uchar.h>
 19
 20static bool rfc_is_alpha(unsigned char c)
 21/*
 22 * ALPHA =  %x41-5A / %x61-7A  ; A-Z / a-z
 23 */
 24{
 25    return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
 26}
 27
 28static bool rfc_is_ctl(unsigned char c)
 29/*
 30 * https://datatracker.ietf.org/doc/html/rfc5234#appendix-B.1
 31 *
 32 * CTL = %x00-1F / %x7F
 33 *
 34 * i.e. any US-ASCII control character (octets 0 - 31) and DEL (127)
 35 */
 36{
 37    return (0 <= c && c <= 31) || c == 127;
 38}
 39
 40static bool rfc_is_vchar(unsigned char c)
 41{
 42    return 0x21 <= c && c <= 0x7E;
 43}
 44
 45int main(int argc, char* argv[])
 46{
 47    for (unsigned char c = 0; c < 128; c++) {
 48        uint8_t bits = 0;
 49        {
 50            bool u = u_isspace(c);
 51            bool s = isspace(c);
 52            if (s || u) {
 53                /*
 54                here's what we've got:
 55
 56                1C: isspace=0 u_isspace=1
 57                1D: isspace=0 u_isspace=1
 58                1E: isspace=0 u_isspace=1
 59                1F: isspace=0 u_isspace=1
 60
 61                if (s && u) {
 62                    bits |= PW_CTYPE_SPACE;
 63                } else {
 64                    fprintf(stderr, "%02X: isspace=%d u_isspace=%d\n", c, s, u);
 65                }
 66
 67                As long as these characters are actually control characters, use isspace variant.
 68
 69                On second thought, isspace also includes certical tab and form feed
 70                that aren't allowed in Core Rules of https://datatracker.ietf.org/doc/html/rfc5234
 71                So, let it be ICU version.
 72                */
 73                if (u) {
 74                    bits |= PW_CTYPE_SPACE;
 75                }
 76            }
 77        }
 78        {
 79            bool u = u_isblank(c);
 80            bool s = isblank(c);
 81            if (s || u) {
 82                if (s && u) {
 83//                    bits |= PW_CTYPE_BLANK;
 84                } else {
 85                    fprintf(stderr, "%02X: isblank=%d u_isblank=%d\n", c, s, u);
 86                }
 87            }
 88        }
 89        {
 90            bool u = u_isalpha(c);
 91            bool s = isalpha(c);
 92            bool r = rfc_is_alpha(c);
 93            if (s || u|| r) {
 94                if (s && u && r) {
 95                    bits |= PW_CTYPE_ALPHA;
 96                } else {
 97                    fprintf(stderr, "%02X: isalpha=%d u_isalpha=%d rfc_is_alpha=%d\n", c, s, u, r);
 98                }
 99            }
100        }
101        {
102            bool u = u_isdigit(c);
103            bool s = isdigit(c);
104            if (s || u) {
105                if (s && u) {
106                    bits |= PW_CTYPE_DIGIT;
107                } else {
108                    fprintf(stderr, "%02X: isdigit=%d u_isdigit=%d\n", c, s, u);
109                }
110            }
111        }
112        {
113            bool u = u_isxdigit(c);
114            bool s = isxdigit(c);
115            if (s || u) {
116                if (s && u) {
117                    bits |= PW_CTYPE_XDIGIT;
118                } else {
119                    fprintf(stderr, "%02X: isxdigit=%d u_isxdigit=%d\n", c, s, u);
120                }
121            }
122        }
123        {
124            bool u = u_ispunct(c);
125            bool s = ispunct(c);
126            if (s || u) {
127                /*
128                here's what we've got:
129
130                24: ispunct=1 u_ispunct=0
131                2B: ispunct=1 u_ispunct=0
132                3C: ispunct=1 u_ispunct=0
133                3D: ispunct=1 u_ispunct=0
134                3E: ispunct=1 u_ispunct=0
135                5E: ispunct=1 u_ispunct=0
136                60: ispunct=1 u_ispunct=0
137                7C: ispunct=1 u_ispunct=0
138                7E: ispunct=1 u_ispunct=0
139
140                if (s && u) {
141                    bits |= PW_CTYPE_PUNCT;
142                } else {
143                    fprintf(stderr, "%02X: ispunct=%d u_ispunct=%d\n", c, s, u);
144                }
145                */
146                // pet is unsure, let it be ICU version:
147                if (u) {
148                    bits |= PW_CTYPE_PUNCT;
149                }
150            }
151        }
152        {
153            bool u = u_iscntrl(c);
154            bool s = iscntrl(c);
155            bool r = rfc_is_ctl(c);
156            if (s || u || r) {
157                if (s && u && r) {
158//                    bits |= PW_CTYPE_CTL;
159                } else {
160                    fprintf(stderr, "%02X: iscntrl=%d u_iscntrl=%d rfc_is_ctl=%d\n", c, s, u, r);
161                }
162            }
163        }
164        {
165            bool u = u_isgraph(c);
166            bool s = isgraph(c);
167            bool r = rfc_is_vchar(c);
168            if (s || u || r) {
169                if (s && u && r) {
170                    bits |= PW_CTYPE_GRAPH;
171                } else {
172                    fprintf(stderr, "%02X: isgraph=%d u_isgraph=%d rfc_is_vchar=%d\n", c, s, u, r);
173                }
174            }
175        }
176        {
177            bool u = u_isprint(c);
178            bool s = isprint(c);
179            if (s || u) {
180                if (s && u) {
181//                    bits |= PW_CTYPE_PRINT;
182                } else {
183                    fprintf(stderr, "%02X: isprint=%d u_isprint=%d\n", c, s, u);
184                }
185            }
186        }
187        {
188            bool u = u_islower(c);
189            bool s = islower(c);
190            if (s || u) {
191                if (s && u) {
192                    bits |= PW_CTYPE_LOWER;
193                } else {
194                    fprintf(stderr, "%02X: islower=%d u_islower=%d\n", c, s, u);
195                }
196            }
197        }
198        {
199            bool u = u_isupper(c);
200            bool s = isupper(c);
201            if (s || u) {
202                if (s && u) {
203                    bits |= PW_CTYPE_UPPER;
204                } else {
205                    fprintf(stderr, "%02X: isupper=%d u_isupper=%d\n", c, s, u);
206                }
207            }
208        }
209
210        _pw_chartype[c] = bits;
211    }
212
213      puts("        000000000011111111112222222222333");
214    printf("        ");
215    for (unsigned char c = 0; c < 128; c++) {
216        if (c <= 32 || c == 127) {
217            printf("%d", c % 10);
218        } else {
219            putchar(c);
220        }
221    }
222    putchar('\n');
223
224#define BAR(mask)  \
225    printf("%6s  ", #mask);  \
226    for (unsigned char c = 0; c < 128; c++) {  \
227        if (_pw_chartype[c] & PW_CTYPE_##mask) {  \
228            putchar('1');  \
229        } else {  \
230            putchar(' ');  \
231        }  \
232    }  \
233    putchar('\n');
234
235    BAR(SPACE);
236    BAR(ALPHA);
237    BAR(DIGIT);
238    BAR(XDIGIT);
239    BAR(PUNCT);
240    BAR(GRAPH);
241    BAR(LOWER);
242    BAR(UPPER);
243
244    // print array initializer
245    for (unsigned char c = 0, column = 0; c < 128; c++) {
246        printf("0x%02x", _pw_chartype[c]);
247        if (c < 127) {
248            putchar(',');
249        }
250        if (++column % 16) {
251            putchar(' ');
252        } else {
253            putchar('\n');
254        }
255    }
256
257    return 0;
258}
259
260#endif