1#pragma once
  2
  3#ifdef PW_WITH_ICU
  4    // ICU library for character classification:
  5#   include <unicode/uchar.h>
  6#endif
  7
  8#include <pw_types.h>
  9
 10/*
 11 * Character classification
 12 *
 13 * Basically, ctype functions from the standard C library show the same performance.
 14 * Which means they probably use the same approach.
 15 * This implementation might look unnecessary, however having access to bit flags
 16 * makes possible to optimize more complex tests where it counts.
 17 *
 18 * As for tolower/toupper replacement, they do not require libstdc++ and libm,
 19 * and show slightly better performance on x86-64
 20 */
 21
 22#ifdef __cplusplus
 23extern "C" {
 24#endif
 25
 26extern uint8_t _pw_chartype[128];  // character type bits
 27
 28#define pw_is_ascii(c)  _pw_likely(((c) & ~(typeof(c)) 127) == 0)  // this works for any type of `c`
 29
 30#define _pw_chartype_bits(c)  _pw_chartype[(unsigned)(c)]  // typecast to unsigned to get rid of compiler warnings
 31
 32// most complicated and/or frequently used tests, they use _pw_chartype table
 33#define PW_CTYPE_SPACE     1
 34#define PW_CTYPE_ALPHA     2
 35#define PW_CTYPE_DIGIT     4
 36#define PW_CTYPE_XDIGIT    8
 37#define PW_CTYPE_PUNCT    16
 38#define PW_CTYPE_GRAPH    32  // also VCHAR in terms of https://datatracker.ietf.org/doc/html/rfc5234
 39#define PW_CTYPE_LOWER    64
 40#define PW_CTYPE_UPPER   128
 41
 42#define pw_is_ascii_space(c)  (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_SPACE))
 43#define pw_is_ascii_alpha(c)  (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_ALPHA))
 44#define pw_is_ascii_digit(c)  (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_DIGIT))
 45#define pw_is_ascii_xdigit(c) (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_XDIGIT))
 46#define pw_is_ascii_alnum(c)  (pw_is_ascii(c) && (_pw_chartype_bits(c) & (PW_CTYPE_ALPHA | PW_CTYPE_DIGIT)))
 47#define pw_is_ascii_punct(c)  (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_PUNCT))
 48#define pw_is_ascii_graph(c)  (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_GRAPH))
 49#define pw_is_ascii_lower(c)  (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_LOWER))
 50#define pw_is_ascii_upper(c)  (pw_is_ascii(c) && (_pw_chartype_bits(c) & PW_CTYPE_UPPER))
 51
 52// simple tests:
 53#define pw_is_ascii_cntrl(c)  (pw_is_ascii(c)? (c < 32 || c == 127))
 54#define pw_is_ascii_blank(c)  (pw_is_ascii(c)? (c == 9 || c == 32))
 55#define pw_is_ascii_print(c)  (pw_is_ascii(c)? (c == 32 || (_pw_chartype_bits(c) & PW_CTYPE_GRAPH)))
 56
 57
 58extern char _pw_charlower[128];
 59extern char _pw_charupper[128];
 60
 61#define pw_ascii_char_lower(c)  (pw_is_ascii(c)? (_pw_charlower[(unsigned)(c)]) : c)
 62#define pw_ascii_char_upper(c)  (pw_is_ascii(c)? (_pw_charupper[(unsigned)(c)]) : c)
 63
 64
 65#ifdef PW_WITH_ICU
 66
 67#   define pw_isspace(c)  (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_SPACE) : u_isspace(c))
 68#   define pw_isalpha(c)  (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_ALPHA) : u_isalpha(c))
 69#   define pw_isdigit(c)  (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_DIGIT) : u_isdigit(c))
 70#   define pw_isxdigit(c) (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_XDIGIT) : u_isxdigit(c))
 71#   define pw_isalnum(c)  (pw_is_ascii(c)? (_pw_chartype_bits(c) & (PW_CTYPE_ALPHA | PW_CTYPE_DIGIT)) : u_isalnum(c))
 72#   define pw_ispunct(c)  (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_PUNCT) : u_ispunct(c))
 73#   define pw_isgraph(c)  (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_GRAPH) : u_isgraph(c))
 74#   define pw_islower(c)  (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_LOWER) : u_islower(c))
 75#   define pw_isupper(c)  (pw_is_ascii(c)? (_pw_chartype_bits(c) & PW_CTYPE_UPPER) : u_isupper(c))
 76
 77#   define pw_iscntrl(c)  (pw_is_ascii(c)? (c < 32 || c == 127) : u_iscntrl(c))
 78#   define pw_isblank(c)  (pw_is_ascii(c)? (c == 9 || c == 32)  : u_isblank(c))
 79#   define pw_isprint(c)  (pw_is_ascii(c)? (c == 32 || (_pw_chartype_bits(c) & PW_CTYPE_GRAPH)) : u_isprint(c))
 80
 81//  XXX: instead u_tolower/u_toupper pet should use variants that accept specific locale (are there any in ICU?)
 82//  the locale should be stored in current task, probably
 83#   define pw_char_lower(c)  (pw_is_ascii(c)? (_pw_charlower[(unsigned)(c)]) : u_tolower(c))
 84#   define pw_char_upper(c)  (pw_is_ascii(c)? (_pw_charupper[(unsigned)(c)]) : u_toupper(c))
 85
 86#else
 87
 88#   define pw_isspace(c)  pw_is_ascii_space(c)
 89#   define pw_isalpha(c)  pw_is_ascii_alpha(c)
 90#   define pw_isdigit(c)  pw_is_ascii_digit(c)
 91#   define pw_isxdigit(c) pw_is_ascii_xdigit(c)
 92#   define pw_isalnum(c)  pw_is_ascii_alnum(c)
 93#   define pw_ispunct(c)  pw_is_ascii_punct(c)
 94#   define pw_isgraph(c)  pw_is_ascii_graph(c)
 95#   define pw_islower(c)  pw_is_ascii_lower(c)
 96#   define pw_isupper(c)  pw_is_ascii_upper(c)
 97#   define pw_iscntrl(c)  pw_is_ascii_cntrl(c)
 98#   define pw_isblank(c)  pw_is_ascii_blank(c)
 99#   define pw_isprint(c)  pw_is_ascii_print(c)
100
101#   define pw_char_lower(c)  pw_ascii_char_lower(c)
102#   define pw_char_upper(c)  pw_ascii_char_upper(c)
103
104#endif
105
106#ifdef __cplusplus
107}
108#endif