1#include "include/pw.h"
  2#include "include/pwlib/file.h"
  3#include "src/types/string/string_internal.h"
  4
  5[[nodiscard]] bool pw_load_text_file(PwValuePtr filename, PwValuePtr content)
  6{
  7    pw_destroy(content);
  8    *content = PwString();
  9    uint8_t* dest_ptr = _pw_string_start(content);
 10
 11    PwValue f = PW_NULL;
 12    if (!pw_file_open_unbuffered(filename, O_RDONLY, 0, &f)) {
 13        return false;
 14    }
 15    bool ret = false;
 16    uint8_t  partial_utf8[4];   // UTF-8 sequence may span adjacent reads
 17    unsigned partial_utf8_len = 0;
 18    for (;;) {
 19        uint8_t buffer[4096];
 20        unsigned bytes_remaining;
 21        if (!pw_read(&f, buffer, sizeof(buffer), &bytes_remaining)) {
 22            break;
 23        }
 24        if (bytes_remaining == 0) {
 25            // end of file
 26            ret = true;
 27            break;
 28        }
 29        uint8_t* src_ptr = buffer;
 30        if (partial_utf8_len) {
 31            // process partial UTF-8 sequence
 32            while (partial_utf8_len < sizeof(partial_utf8)) {
 33                if (bytes_remaining == 0) {
 34                    // premature end of file
 35                    // XXX warn?
 36                    break;
 37                }
 38                uint8_t c = *src_ptr;
 39                // expect continuation bytes only
 40                if ((c & 0xC0) != 0x80) {
 41                    // malformed UTF-8 sequence
 42                    partial_utf8_len = 0;
 43                    break;
 44                }
 45                src_ptr++;
 46                bytes_remaining--;
 47                partial_utf8[partial_utf8_len++] = c;
 48
 49                uint8_t* ptr = partial_utf8;
 50                unsigned rem = partial_utf8_len;
 51                char32_t chr;
 52                if (_pw_decode_utf8_buffer(&ptr, &rem, &chr)) {
 53                    if (chr != 0 && chr != 0xFFFFFFFF) {
 54                        if (!pw_string_append(content, chr)) {
 55                            return false;
 56                        }
 57                        // update dest_ptr because pw_string_append may reallocate
 58                        _pw_string_start_end(content, &dest_ptr);
 59                    }
 60                    pw_assert(rem == 0);
 61                    break;
 62                }
 63            }
 64            partial_utf8_len = 0;
 65        }
 66        uint8_t dest_char_size = content->str_params.char_size;
 67        unsigned dest_avail = _pw_string_avail(content);
 68        unsigned num_appended = 0;
 69        while (bytes_remaining) {
 70            char32_t chr;
 71            if (!_pw_decode_utf8_buffer(&src_ptr, &bytes_remaining, &chr)) {
 72                partial_utf8_len = bytes_remaining;
 73                pw_assert(partial_utf8_len <= sizeof(partial_utf8));
 74                for (unsigned i = 0; i < partial_utf8_len; i++) {
 75                    partial_utf8[i] = *src_ptr++;
 76                }
 77                break;
 78            }
 79            if (chr == 0 || chr == 0xFFFFFFFF) {
 80                continue;
 81            }
 82            /*
 83            if (!pw_string_append(content, chr)) {
 84                return false;
 85            }
 86
 87            Optimized code below is twice faster than pw_string_append:
 88            */
 89
 90            uint8_t src_char_size = calc_char_size(chr);
 91            if (_pw_unlikely(src_char_size > dest_char_size || dest_avail == 0)) {
 92                // need to expand string
 93                _pw_string_inc_length(content, num_appended);
 94                num_appended = 0;
 95                unsigned n = (dest_avail == 0)? 512 : 0;
 96                if (!_pw_expand_string(content, n, src_char_size)) {
 97                    return false;
 98                }
 99                dest_avail = _pw_string_avail(content);
100                _pw_string_start_end(content, &dest_ptr);
101                dest_char_size = content->str_params.char_size;
102            }
103            dest_ptr += _pw_put_char(dest_ptr, chr, dest_char_size);
104            num_appended++;
105            dest_avail--;
106        }
107        _pw_string_inc_length(content, num_appended);
108    }
109    pw_close(&f);
110    return ret;
111}