1#pragma once
2
3#ifdef PW_WITH_ICU
4 // ICU library for character classification:
5# include <unicode/uchar.h>
6#endif
7
8#include <pw_types.h>
9
10/*
11 * Character classification
12 *
13 * Basically, ctype functions from the standard C library show the same performance.
14 * Which means they probably use the same approach.
15 * This implementation might look unnecessary, however having access to bit flags
16 * makes possible to optimize more complex tests where it counts.
17 *
18 * As for tolower/toupper replacement, they do not require libstdc++ and libm,
19 * and show slightly better performance on x86-64
20 */
21
22#ifdef __cplusplus
23extern "C" {
24#endif
25
26extern uint8_t _pw_chartype[128]; // character type bits
27
28#define pw_is_ascii(c) _pw_likely(((c) & ~(typeof(c)) 127) == 0) // this works for any type of `c`
29
30#define _pw_chartype_bits(c) _pw_chartype[(unsigned)(c)] // typecast to unsigned to get rid of compiler warnings
31
32// most complicated and/or frequently used tests, they use _pw_chartype table
33#define PW_CTYPE_SPACE 1
34#define PW_CTYPE_ALPHA 2
35#define PW_CTYPE_DIGIT 4
36#define PW_CTYPE_XDIGIT 8
37#define PW_CTYPE_PUNCT 16
38#define PW_CTYPE_GRAPH 32 // also VCHAR in terms of https://datatracker.ietf.org/doc/html/rfc5234
39#define PW_CTYPE_LOWER 64
40#define PW_CTYPE_UPPER 128
41
42#define pw_is_ascii_space(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_SPACE))
43#define pw_is_ascii_alpha(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_ALPHA))
44#define pw_is_ascii_digit(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_DIGIT))
45#define pw_is_ascii_xdigit(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_XDIGIT))
46#define pw_is_ascii_alnum(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & (PW_CTYPE_ALPHA | PW_CTYPE_DIGIT)))
47#define pw_is_ascii_punct(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_PUNCT))
48#define pw_is_ascii_graph(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_GRAPH))
49#define pw_is_ascii_lower(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_LOWER))
50#define pw_is_ascii_upper(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_UPPER))
51
52// simple tests:
53#define pw_is_ascii_cntrl(c) (pw_is_ascii(c)? (c < 32 || c == 127))
54#define pw_is_ascii_blank(c) (pw_is_ascii(c)? (c == 9 || c == 32))
55#define pw_is_ascii_print(c) (pw_is_ascii(c)? (c == 32 || (_pw_chartype_bits(c) & PW_CTYPE_GRAPH)))
56
57
58extern char _pw_charlower[128];
59extern char _pw_charupper[128];
60
61#define pw_ascii_char_lower(c) (pw_is_ascii(c)? (_pw_charlower[(unsigned)(c)]) : c)
62#define pw_ascii_char_upper(c) (pw_is_ascii(c)? (_pw_charupper[(unsigned)(c)]) : c)
63
64
65#ifdef PW_WITH_ICU
66
67# define pw_isspace(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_SPACE) : u_isspace(c))
68# define pw_isalpha(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_ALPHA) : u_isalpha(c))
69# define pw_isdigit(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_DIGIT) : u_isdigit(c))
70# define pw_isxdigit(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_XDIGIT) : u_isxdigit(c))
71# define pw_isalnum(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & (PW_CTYPE_ALPHA | PW_CTYPE_DIGIT)) : u_isalnum(c))
72# define pw_ispunct(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_PUNCT) : u_ispunct(c))
73# define pw_isgraph(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_GRAPH) : u_isgraph(c))
74# define pw_islower(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_LOWER) : u_islower(c))
75# define pw_isupper(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_UPPER) : u_isupper(c))
76
77# define pw_iscntrl(c) (pw_is_ascii(c)? (c < 32 || c == 127) : u_iscntrl(c))
78# define pw_isblank(c) (pw_is_ascii(c)? (c == 9 || c == 32) : u_isblank(c))
79# define pw_isprint(c) (pw_is_ascii(c)? (c == 32 || (_pw_chartype_bits(c) & PW_CTYPE_GRAPH)) : u_isprint(c))
80
81// XXX: instead u_tolower/u_toupper pet should use variants that accept specific locale (are there any in ICU?)
82// the locale should be stored in current task, probably
83# define pw_char_lower(c) (pw_is_ascii(c)? (_pw_charlower[(unsigned)(c)]) : u_tolower(c))
84# define pw_char_upper(c) (pw_is_ascii(c)? (_pw_charupper[(unsigned)(c)]) : u_toupper(c))
85
86#else
87
88# define pw_isspace(c) pw_is_ascii_space(c)
89# define pw_isalpha(c) pw_is_ascii_alpha(c)
90# define pw_isdigit(c) pw_is_ascii_digit(c)
91# define pw_isxdigit(c) pw_is_ascii_xdigit(c)
92# define pw_isalnum(c) pw_is_ascii_alnum(c)
93# define pw_ispunct(c) pw_is_ascii_punct(c)
94# define pw_isgraph(c) pw_is_ascii_graph(c)
95# define pw_islower(c) pw_is_ascii_lower(c)
96# define pw_isupper(c) pw_is_ascii_upper(c)
97# define pw_iscntrl(c) pw_is_ascii_cntrl(c)
98# define pw_isblank(c) pw_is_ascii_blank(c)
99# define pw_isprint(c) pw_is_ascii_print(c)
100
101# define pw_char_lower(c) pw_ascii_char_lower(c)
102# define pw_char_upper(c) pw_ascii_char_upper(c)
103
104#endif
105
106#ifdef __cplusplus
107}
108#endif