1#include <errno.h>
2#include <signal.h>
3#include <setjmp.h>
4#include <stdarg.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <string.h>
8#include <threads.h>
9#include <sys/mman.h>
10
11#include "allocator.h"
12#include "allocator_bitmap.h"
13
14#ifdef DEBUG
15# include "dump.h"
16#endif
17
18
19// to avoid confusion with UNIT_*
20#define UNSIGNED_WIDTH UINT_WIDTH
21#define UNSIGNED_MAX UINT_MAX
22
23unsigned bitmap_page_units;
24unsigned bitmap_page_units_pwr2;
25unsigned bitmap_page_units_bitmask;
26unsigned bitmap_page_bytes;
27unsigned bitmap_page_bytes_pwr2;
28
29// superblock
30Chunk chunks[MAX_CHUNKS] = {};
31
32// Range of virtual address space reserved for num_bits_set arrays.
33uint16_t* num_bits_set_start;
34uint16_t* num_bits_set_end;
35
36// multithreading serialization
37static mtx_t lock;
38
39// signal handling
40static struct sigaction segv_default_action;
41static thread_local sig_atomic_t segv_handler_enabled = false;
42static thread_local sigjmp_buf jmptarget;
43
44#ifdef DEBUG
45 static volatile unsigned nbs_pages_committed = 0;
46 static volatile unsigned bitmap_pages_committed = 0;
47 static volatile unsigned data_pages_committed = 0;
48#endif
49
50
51static void enter_allocator()
52{
53 mtx_lock(&lock);
54 segv_handler_enabled = true;
55}
56
57static void exit_allocator()
58{
59 segv_handler_enabled = false;
60 mtx_unlock(&lock);
61}
62
63static void print_msg(const char* func_name, char* fmt, ...)
64{
65 fprintf(stderr, "Bitmap allocator %s: ", func_name);
66 va_list ap;
67 va_start(ap);
68 vfprintf(stderr, fmt, ap);
69 va_end(ap);
70}
71
72static unsigned count_page_leading_zeros(void* bitmap_page)
73/*
74 * Count zero bits on the page starting from the begining.
75 */
76{
77 // XXX use optimal data type and mind endianess
78 unsigned num_bits = 0;
79 unsigned* ptr = (unsigned*) bitmap_page;
80 for (unsigned i = 0, n = sys_page_size / sizeof(unsigned); i < n; i++, ptr++) {
81 unsigned w = *ptr;
82 if (w) {
83 num_bits += count_trailing_zeros(w);
84 break;
85 } else {
86 num_bits += UNSIGNED_WIDTH;
87 }
88 }
89 return num_bits;
90}
91
92static unsigned count_page_trailing_zeros(void* bitmap_page)
93/*
94 * Count zero bits on the page backward.
95 */
96{
97 // XXX use optimal data type and mind endianess
98 unsigned num_bits = 0;
99 unsigned n = sys_page_size / sizeof(unsigned);
100 unsigned* ptr = ((unsigned*) bitmap_page) + n - 1;
101 for (; n >= 0; n--, ptr--) {
102 unsigned w = *ptr;
103 if (w) {
104 num_bits += count_leading_zeros(w);
105 break;
106 } else {
107 num_bits += UNSIGNED_WIDTH;
108 }
109 }
110 return num_bits;
111}
112
113static bool find_zero_bits(void* bitmap_page, unsigned length, unsigned* bit_index)
114/*
115 * Find sequence of zero bits within bitmap_page.
116 */
117{
118 // XXX use optimal data type and mind endianess
119 TRACE("bitmap_page=%p length=%u\n", bitmap_page, length);
120 unsigned start = 0;
121 unsigned prev_zeros = 0;
122 unsigned* ptr = (unsigned*) bitmap_page;
123 if (length <= UNSIGNED_WIDTH) {
124
125 // search short sequence
126 for (unsigned i = 0, n = sys_page_size / sizeof(unsigned); i < n; i++, ptr++) {
127 unsigned w = *ptr;
128 if (w == UNSIGNED_MAX) {
129 start += UNSIGNED_WIDTH;
130 prev_zeros = 0;
131 continue;
132 }
133 if (w == 0) {
134 *bit_index = start;
135 return true;
136 }
137 unsigned leading_zeros = count_leading_zeros(w);
138 while (w) {
139 unsigned trailing_zeros = count_trailing_zeros(w);
140 if (trailing_zeros) {
141 if (length <= prev_zeros + trailing_zeros) {
142 *bit_index = start;
143 return true;
144 }
145 w >>= trailing_zeros;
146 start += trailing_zeros;
147 }
148 prev_zeros = 0;
149 unsigned trailing_ones = count_trailing_ones(w);
150 if (trailing_ones) {
151 w >>= trailing_ones;
152 start += trailing_ones;
153 }
154 }
155 start = i * UNSIGNED_WIDTH;
156 prev_zeros = leading_zeros;
157 if (leading_zeros) {
158 start += UNSIGNED_WIDTH - leading_zeros;
159 }
160 if (length <= prev_zeros) {
161 *bit_index = start;
162 return true;
163 }
164 }
165 } else {
166 // search long sequence
167
168 for (unsigned i = 0, n = sys_page_size / sizeof(unsigned); i < n; i++, ptr++) {
169 unsigned w = *ptr;
170 if (w == 0) {
171 prev_zeros += UNSIGNED_WIDTH;
172 } else {
173 unsigned trailing_zeros = count_trailing_zeros(w);
174 if (length <= prev_zeros + trailing_zeros) {
175 *bit_index = start;
176 return true;
177 }
178 unsigned leading_zeros = count_leading_zeros(w);
179
180 start = i * UNSIGNED_WIDTH;
181 prev_zeros = leading_zeros;
182 if (leading_zeros) {
183 start += UNSIGNED_WIDTH - leading_zeros;
184 }
185 }
186 }
187 }
188 if (length <= prev_zeros) {
189 *bit_index = start;
190 return true;
191 }
192 return false;
193}
194
195static bool find_empty_bitmap_page(Chunk* chunk, unsigned* bitmap_page_index)
196/*
197 * Find zero value in chunk->num_bits_set array.
198 * If found, wWrite its position to bitmap_page_index and return true.
199 */
200{
201 uint16_t* ptr = chunk->num_bits_set;
202 for (unsigned i = 0, n = chunk->num_bitmap_pages; i < n; i++, ptr++) {
203 if (*ptr == 0) {
204 *bitmap_page_index = i;
205 return true;
206 }
207 }
208 return false;
209}
210
211static inline unsigned count_empty_bitmap_pages(uint16_t* num_bits_set_ptr, unsigned max_pages)
212{
213 unsigned count = 0;
214 while (max_pages--) {
215 if (*num_bits_set_ptr++) {
216 break;
217 }
218 count++;
219 }
220 return count;
221}
222
223static bool find_consecutive_empty_bitmap_pages(Chunk* chunk, unsigned num_pages, unsigned* bitmap_page_index)
224/*
225 * Find sequence of zero values in chunk->num_bits_set array.
226 * If found, write its position to bitmap_page_index and return true.
227 */
228{
229 if (chunk->num_bitmap_pages < num_pages) {
230 ERR("num_pages %u is bigger than chunk->num_bitmap_pages %u\n", num_pages, chunk->num_bitmap_pages);
231 abort();
232 }
233 uint16_t* ptr = chunk->num_bits_set;
234 for (unsigned i = 0, n = chunk->num_bitmap_pages - num_pages; n != 0; i++, n--) {
235 if (*ptr++ == 0) {
236 unsigned limit = num_pages - 1; // checked the first one already, that's why -1
237 unsigned count = count_empty_bitmap_pages(ptr, limit);
238 if (count == limit) {
239 *bitmap_page_index = i;
240 return true;
241 }
242 count++; // skip checked non-empty page
243 ptr += count;
244 i += count;
245 n -= count;
246 }
247 }
248 return false;
249}
250
251static unsigned count_consecutive_zero_bits(Chunk* chunk, unsigned offset, unsigned limit)
252/*
253 * Count consecutive zero bits in the bitmap starting from `offset` bit up to `limit`.
254 * Return the number of bits counted.
255 */
256{
257 // XXX use optimal data type and mind endianess
258 TRACE("chunk=%zd offset=%u limit=%u\n", chunk - chunks, offset, limit);
259 unsigned count = 0;
260 uint8_t* addr = unit_index_to_address(chunk, offset);
261
262 // count leading bits up to the the next byte boundary
263 unsigned bit_index = offset & 7;
264 if (bit_index) {
265 /*
266 * Prepare leading_bitmask for checking first bits.
267 * Suppose offset is 19, thus, bits start from byte index 2 and bit index 3.
268 * The bitmask will be 0b11111000 (mind MSB first in this notation)
269 * However, if limit is less than (8 - bit index), the bitmask should be limited as well.
270 * I.e. if limit is 3, the bitmask will be 0b00111000
271 */
272 unsigned num_leading_bits = 8 - bit_index;
273 unsigned leading_bitmask = ~((1 << bit_index) - 1);
274 if (limit < num_leading_bits) {
275 leading_bitmask &= (1 << (limit + bit_index)) - 1;
276 }
277 TRACE("num_leading_bits=%u leading_bitmask=%x\n", num_leading_bits, leading_bitmask);
278 // check leading bits
279 uint8_t n = *addr++ & leading_bitmask;
280 if (n) {
281 // have bits set, return the number of bits starting from bit_index
282 return count_trailing_zeros(n) - bit_index;
283 }
284 if (limit <= num_leading_bits) {
285 return limit;
286 }
287 limit -= num_leading_bits;
288 offset += num_leading_bits;
289 count = num_leading_bits;
290 }
291
292 // now, check bytes up to the next unsigned int boundary
293# ifdef DEBUG
294 {
295 if (offset & 7) {
296 ERR("offset is not properly aligned %u\n", offset);
297 abort();
298 }
299 }
300# endif
301 unsigned byte_offset = offset >> 3;
302 unsigned leading_bytes = sizeof(unsigned) - (byte_offset & (sizeof(unsigned) - 1));
303 TRACE("byte_offset=%u leading_bytes=%u\n", byte_offset, leading_bytes);
304 while (leading_bytes) {
305 uint8_t n = *addr++;
306 if (limit < 8) {
307 if (n) {
308 unsigned z = count_trailing_zeros(n);
309 if (z < limit) {
310 limit = z;
311 }
312 }
313 return count + limit;
314 }
315 if (n) {
316 return count + count_trailing_zeros(n);
317 }
318 leading_bytes--;
319 byte_offset++;
320 limit -= 8;
321 offset += 8;
322 count += 8;
323 }
324
325 // now count up to the next page boundary
326# ifdef DEBUG
327 {
328 if (byte_offset & (sizeof(unsigned) - 1)) {
329 ERR("byte_offset is not properly aligned: %u\n", byte_offset);
330 abort();
331 }
332 }
333# endif
334 byte_offset &= ~(sizeof(unsigned) - 1);
335 unsigned* uint_addr = (unsigned*) addr;
336 unsigned num_leading = (byte_offset & sys_page_bitmask) / sizeof(unsigned);
337 while (num_leading) {
338 unsigned n = *uint_addr++;
339 if (limit < UNSIGNED_WIDTH) {
340 if (n) {
341 unsigned z = count_trailing_zeros(n);
342 if (z < limit) {
343 limit = z;
344 }
345 }
346 return count + limit;
347 }
348 if (n) {
349 return count + count_trailing_zeros(n);
350 }
351 num_leading--;
352 limit -= UNSIGNED_WIDTH;
353 offset += UNSIGNED_WIDTH;
354 count += UNSIGNED_WIDTH;
355 }
356
357 // now count empty pages that follow
358 unsigned bitmap_page_index = unit_index_to_bitmap_page_index(offset);
359# ifdef DEBUG
360 {
361 uint8_t* bitmap_page_addr = bitmap_page_by_index(chunk, bitmap_page_index);
362 if (uint_addr != (unsigned*) bitmap_page_addr) {
363 ERR("bitmap page address mismatch: %p != %p\n", uint_addr, bitmap_page_addr);
364 abort();
365 }
366 }
367# endif
368 uint16_t* nbs_ptr = chunk->num_bits_set;
369 for (; bitmap_page_index < chunk->num_bitmap_pages; bitmap_page_index++) {
370 unsigned n = *nbs_ptr++;
371 if (limit < bitmap_page_units) {
372 if (n) {
373 unsigned z = count_page_leading_zeros(
374 bitmap_page_by_index(chunk, bitmap_page_index)
375 );
376 if (z < limit) {
377 limit = z;
378 }
379 }
380 return count + limit;
381 }
382 if (n) {
383 return count + count_page_leading_zeros(
384 bitmap_page_by_index(chunk, bitmap_page_index)
385 );
386 }
387 limit -= bitmap_page_units;
388 offset += bitmap_page_units;
389 count += bitmap_page_units;
390 }
391 return count;
392}
393
394static bool find_free_space(Chunk* chunk, unsigned num_units, unsigned* unit_index)
395/*
396 * Find consecutive zero bits (i.e. num_units) in the bitmap.
397 * num_units is less than or equal to page size.
398 */
399{
400 TRACE("chunk=%zd num_units=%u\n", chunk - chunks, num_units);
401 uint16_t* nbs_ptr = chunk->num_bits_set;
402 for (unsigned i = 0, n = chunk->num_bitmap_pages; i < n; i++, nbs_ptr++) {
403 unsigned num_bits_set = *nbs_ptr;
404 if (num_bits_set == 0) {
405 // found empty page
406 *unit_index = bitmap_page_index_to_unit_index(i);
407 return true;
408 }
409 if (num_units > sys_page_size - num_bits_set) {
410 // no free space on the page, skip it
411 continue;
412 }
413 // potentially free space, if not fragmented
414 void* bitmap_page = bitmap_page_by_index(chunk, i);
415 if (find_zero_bits(bitmap_page, num_units, unit_index)) {
416 // found!
417 // unit index is already set by find_zero_bits, make it absolute
418 *unit_index += bitmap_page_index_to_unit_index(i);
419 return true;
420 }
421 // check if the page has free space at the end
422 unsigned tail_units_avail = count_page_trailing_zeros(bitmap_page);
423 if (tail_units_avail && (i < n - 1)) {
424 // check head units on the next page
425 unsigned next_units_avail = count_page_leading_zeros(bitmap_page_by_index(chunk, i + 1));
426 if (num_units <= tail_units_avail + next_units_avail) {
427 // found!
428 *unit_index = bitmap_page_index_to_unit_index(i + 1) - tail_units_avail;
429 return true;
430 }
431 }
432 }
433 return false;
434}
435
436static void sigsegv_handler(int sig, siginfo_t* info, void* ucontext)
437/*
438 * Commit pages on demand.
439 * Only bitmap and num_bits_set can be committed on demand,
440 * data pages are committed explicitly when block is allocated.
441 */
442{
443 void* fault_addr = info->si_addr;
444 void* page_addr = (void*) (((ptrdiff_t) fault_addr) & ~(ptrdiff_t) sys_page_bitmask);
445
446 TRACE("%p\n", fault_addr);
447
448 /* In debug mode dump functions may access entire bitmap,
449 * so allow unconditional on-demand allocation.
450 */
451# ifdef DEBUG
452 unsigned bitmap_page = 0;
453 unsigned nbs_page = 0;
454# else
455 if (segv_handler_enabled) {
456# endif
457 Chunk* chunk = chunks;
458 for (unsigned i = 0; i < MAX_CHUNKS; i++, chunk++) {
459 if (chunk->reserved_start == nullptr) {
460 continue;
461 }
462 void* data_start = chunk_data_start(chunk);
463 if (chunk->reserved_start <= fault_addr && fault_addr < data_start) {
464 SAY("committing bitmap page %p for chunk %u (%p - %p)\n",
465 page_addr, i, chunk->reserved_start, chunk->reserved_end);
466# ifdef DEBUG
467 bitmap_page = 1;
468# endif
469 goto commit;
470 }
471 }
472 if (((void*) num_bits_set_start) <= fault_addr && fault_addr < (void*) num_bits_set_end) {
473 SAY("committing page %p for num_bits_set (%p - %p)\n",
474 page_addr, num_bits_set_start, num_bits_set_end);
475# ifdef DEBUG
476 nbs_page = 1;
477# endif
478 goto commit;
479 }
480# ifndef DEBUG
481 }
482# endif
483
484 // singal is not related to any reserved region, involve default action
485 sigaction(SIGSEGV, &segv_default_action, ucontext);
486 return;
487
488commit:
489 if (mprotect(page_addr, sys_page_size, PROT_READ | PROT_WRITE) == -1) {
490 ERR("mprotect: %s\n", strerror(errno));
491 // jump to the branch that returns error in failed function
492 TRACE("longjmp\n");
493 siglongjmp(jmptarget, 1);
494 }
495//# ifdef DEBUG
496 else {
497 // munmap/mmap/mprotect may return dirty page, see https://mastodon.social/@thoughtful_pet/113940384388077319
498 explicit_bzero(page_addr, sys_page_size);
499/*
500# ifdef DEBUG
501 // make sure the block is clean
502 uint8_t* ptr = page_addr;
503 for (unsigned i = 0; i < sys_page_size; i++) {
504 if (*ptr++) {
505 ERR("mprotect block is not clean\n");
506 dump_hex(stderr, page_addr, sys_page_size);
507 abort();
508 }
509 }
510*/
511# ifdef DEBUG
512 bitmap_pages_committed += bitmap_page;
513 nbs_pages_committed += nbs_page;
514# endif
515 }
516//# endif
517}
518
519static void _init()
520{
521 // init signal handler to commit memory on demand
522 struct sigaction act;
523 explicit_bzero(&act, sizeof(act));
524 sigemptyset(&act.sa_mask);
525 act.sa_sigaction = sigsegv_handler;
526 act.sa_flags = SA_RESTART | SA_SIGINFO;
527 int err = sigaction(SIGSEGV, &act, &segv_default_action);
528 if (err) {
529 ERR("sigaction(SIGSEGV): %s\n", strerror(errno));
530 abort();
531 }
532
533 // init global variables
534 bitmap_page_units = sys_page_size * 8;
535 bitmap_page_units_pwr2 = sys_page_pwr2 + 3;
536 bitmap_page_units_bitmask = bitmap_page_units - 1;
537 bitmap_page_bytes = bitmap_page_units << UNIT_PWR2;
538 bitmap_page_bytes_pwr2 = bitmap_page_units_pwr2 + UNIT_PWR2;
539
540 SAY("page size %u; power of two %u; bitmask %x\n",
541 sys_page_size, sys_page_pwr2, sys_page_bitmask);
542
543 // init chunks
544 Chunk* chunk = chunks;
545 unsigned num_bitmap_pages = 1;
546 unsigned nbs_offset = 0;
547 for (unsigned i = 0; i < MAX_CHUNKS; i++, chunk++) {
548
549 size_t bitmap_size = ((size_t) num_bitmap_pages) << sys_page_pwr2; // in bytes
550 size_t chunk_data_size = bitmap_size * 8 * UNIT_SIZE;
551
552 chunk->num_bitmap_pages = num_bitmap_pages;
553 chunk->reserved_start = nullptr;
554 chunk->reserved_end = (void*) (chunk_data_size + bitmap_size);
555 chunk->num_bits_set = (uint16_t*) (ptrdiff_t) nbs_offset; // will add num_bits_set_start later
556
557 nbs_offset += num_bitmap_pages;
558
559 if (chunk_data_size < max_chunk_data_size) {
560 num_bitmap_pages <<= 1;
561 }
562 }
563 // reserve area for num_bits_set
564 unsigned nbs_size = align_unsigned_to_page(nbs_offset * sizeof(uint16_t));
565
566 num_bits_set_start = mmap(NULL, nbs_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
567 if (num_bits_set_start == MAP_FAILED) {
568 ERR("mmap: %s\n", strerror(errno));
569 abort();
570 }
571 num_bits_set_end = (uint16_t*) (
572 ((uint8_t*) num_bits_set_start) + nbs_size
573 );
574 SAY("reserved %p - %p (%u bytes) for %u cumulative entries of num_bits_set arrays\n",
575 num_bits_set_start, num_bits_set_end, nbs_size, nbs_offset);
576 SAY("memory limit for allocation: %zuK\n",
577 (((size_t) nbs_offset) * UNIT_SIZE * bitmap_page_units >> 10));
578
579 // update num_bits_set
580 chunk = chunks;
581 for (unsigned i = 0; i < MAX_CHUNKS; i++, chunk++) {
582 chunk->num_bits_set = num_bits_set_start + (ptrdiff_t) chunk->num_bits_set;
583 }
584
585 // init mutex
586 if (mtx_init(&lock, mtx_plain | mtx_recursive) != thrd_success) {
587 ERR("cannot init mutex\n");
588 }
589}
590
591static bool allocate_page_units(Chunk* chunk, unsigned unit_index, unsigned num_units)
592/*
593 * unit_index is the index within chunk.
594 * num_units is not greater than bitmap page can hold
595 *
596 * Increment num_bits_set.
597 * Commit page if was not committed yet.
598 * Clean data area for already committed pages.
599 */
600{
601# ifdef DEBUG
602 if (num_units >= bitmap_page_units) {
603 ERR("num_units %u is bigger than %u limit\n", num_units, bitmap_page_units);
604 abort();
605 }
606 if ((unit_index & bitmap_page_units_bitmask) + num_units >= bitmap_page_units) {
607 ERR("unit_index %u is too big for page: %u + %u is bigger than %u limit\n",
608 unit_index, unit_index & bitmap_page_units_bitmask, num_units, bitmap_page_units);
609 abort();
610 }
611# endif
612 bool result = true; // do not return on error immediatelly, changes must be coherent
613 unsigned bitmap_page_index = unit_index_to_bitmap_page_index(unit_index);
614 uint8_t* data_start = unit_index_to_address(chunk, unit_index);
615 if (chunk->num_bits_set[bitmap_page_index] == 0) {
616 // data page is not committed yet, do it;
617 // as long as it's the first allocated block, make sure data_start is aligned to page boundary
618 if (((ptrdiff_t) data_start) & (ptrdiff_t) sys_page_bitmask) {
619 ERR("Bad page address %p\n", data_start);
620 abort();
621 }
622 SAY("committing data page %p for chunk %u (%p - %p)\n",
623 data_start, chunk - chunks, chunk->reserved_start, chunk->reserved_end);
624 if (mprotect(data_start, sys_page_size, PROT_READ | PROT_WRITE)) {
625 ERR("mprotect: %s\n", strerror(errno));
626 result = false;
627 }
628//# ifdef DEBUG
629 else {
630 // munmap/mmap/mprotect may return dirty page, see https://mastodon.social/@thoughtful_pet/113940384388077319
631 explicit_bzero(data_start, sys_page_size);
632/*
633# ifdef DEBUG
634 // make sure the block is clean
635 uint8_t* ptr = data_start;
636 for (unsigned i = 0; i < sys_page_size; i++) {
637 if (*ptr++) {
638 ERR("mprotect block is not clean\n");
639 dump_hex(stderr, data_start, sys_page_size);
640 abort();
641 }
642 }
643*/
644# ifdef DEBUG
645 data_pages_committed++;
646# endif
647 }
648//# endif
649 } else {
650 // clean data area
651 explicit_bzero(data_start, units_to_bytes(num_units));
652 }
653 TRACE("adding %u to num_bits_set[%u] at %p\n",
654 num_units, bitmap_page_index, &chunk->num_bits_set[bitmap_page_index]);
655 chunk->num_bits_set[bitmap_page_index] += num_units;
656 return result;
657}
658
659static void release_page_units(Chunk* chunk, unsigned unit_index, unsigned num_units)
660/*
661 * unit_index is the index within chunk.
662 * num_units is not greater than bitmap page can hold
663 *
664 * Counterpart function of allocate_page_units. Does the opposite:
665 *
666 * Decrement num_bits_set.
667 * Reclaim page if not the last one, to avoid flapping commits/reclaims.
668 */
669{
670# ifdef DEBUG
671 if (num_units >= bitmap_page_units) {
672 ERR("num_units %u is bigger than %u limit\n", num_units, bitmap_page_units);
673 abort();
674 }
675 if ((unit_index & bitmap_page_units_bitmask) + num_units >= bitmap_page_units) {
676 ERR("unit_index %u is too big for page: %u + %u is bigger than %u limit\n",
677 unit_index, unit_index & bitmap_page_units_bitmask, num_units, bitmap_page_units);
678 abort();
679 }
680# endif
681 unsigned bitmap_page_index = unit_index_to_bitmap_page_index(unit_index);
682 if (chunk->num_bits_set[bitmap_page_index] < num_units) {
683 ERR("chunk %zd num_bits_set[%u] is lower than requested %u\n",
684 chunk - chunks, bitmap_page_index, num_units);
685 abort();
686 }
687 if (chunk->num_bits_set[bitmap_page_index] == num_units) {
688 // bitmap page is empty, reclaim data pages
689 uint8_t* page_addr = align_pointer_to_page(unit_index_to_address(chunk, unit_index));
690 SAY("reclaiming data page %p for chunk %u (%p - %p)\n",
691 page_addr, chunk - chunks, chunk->reserved_start, chunk->reserved_end);
692 if (munmap(page_addr, sys_page_size) == -1) {
693 ERR("munmap %p: %s\n", page_addr, strerror(errno));
694 }
695 if (mmap(page_addr, sys_page_size, PROT_NONE | MAP_FIXED, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) == MAP_FAILED) {
696 ERR("mmap %p: %s\n", page_addr, strerror(errno));
697 }
698# ifdef DEBUG
699 else {
700 data_pages_committed--;
701 }
702# endif
703 }
704 TRACE("subtracting %u from num_bits_set[%u] at %p\n",
705 num_units, bitmap_page_index, &chunk->num_bits_set[bitmap_page_index]);
706 chunk->num_bits_set[bitmap_page_index] -= num_units;
707}
708
709static bool commit(Chunk* chunk, unsigned unit_index, unsigned num_units)
710/*
711 * Set bits in chunk's bitmap thus marking area as allocated.
712 * Add the number of bits set (i.e. num_units) to num_bits_set.
713 * Commit pages in the data area. We can't commit them on demand. XXX think this through?
714 * Return false if out of memory.
715 */
716{
717 uint8_t* bitmap_addr = ((uint8_t*) chunk->reserved_start) + (unit_index >> 3);
718/*
719 while (num_units > bitmap_page_units) {
720 if (!commit_page_units(bitmap_addr, unit_index, bitmap_page_units)) {
721 // XXX reclaim on error
722 return false;
723 }
724 unit_index += bitmap_page_units;
725 num_units -= bitmap_page_units;
726 bitmap_addr += sys_page_size;
727 }
728 if (num_units) {
729 if (!commit_page_units(bitmap_addr, unit_index, bitmap_page_units)) {
730 // XXX reclaim on error
731 return false;
732 }
733 }
734*/
735 TRACE("unit_index=%u, num_units=%u\n", unit_index, num_units);
736 bool result = true; // do not return on error immediatelly, changes must be coherent
737
738 // set leading bits
739 unsigned leading_bit_index = unit_index & 7;
740 if (leading_bit_index) {
741 TRACE("leading bit index %u\n", leading_bit_index);
742 uint8_t bitmask = ~((1 << leading_bit_index) - 1);
743 unsigned num_bits;
744 if (num_units <= 8 - leading_bit_index) {
745 bitmask &= (1 << (num_units + leading_bit_index)) - 1;
746 num_bits = num_units;
747 num_units = 0;
748 } else {
749 num_bits = 8 - leading_bit_index;
750 num_units -= num_bits;
751 }
752# if DEBUG
753 // make sure we do not doubly allocate
754 if (*bitmap_addr & bitmask) {
755 ERR("leading bits %x are already set\n", bitmask);
756 abort();
757 }
758# endif
759 *bitmap_addr++ |= bitmask;
760
761 result &= allocate_page_units(chunk, unit_index, num_bits);
762 unit_index += num_bits;
763 }
764
765 // set full bytes
766 unsigned num_bytes = num_units >> 3;
767 if (num_bytes) {
768 TRACE("%u full bytes\n", num_bytes);
769# if DEBUG
770 // make sure we do not doubly allocate
771 {
772 uint8_t *p = bitmap_addr;
773 for (unsigned i = 0; i < num_bytes; i++, p++) {
774 if (*p) {
775 ERR("middle byte %u is nonzero\n", i);
776 abort();
777 }
778 }
779 }
780# endif
781 memset(bitmap_addr, 0xFF, num_bytes);
782 bitmap_addr += num_bytes;
783
784 // update num_bits_set and commit data pages
785 unsigned leading_bytes = (bitmap_page_units - (unit_index & bitmap_page_units_bitmask)) >> 3;
786 if (leading_bytes > num_bytes) {
787 leading_bytes = num_bytes;
788 }
789 TRACE("unit index %u, %u leading bytes\n", unit_index, leading_bytes);
790 if (leading_bytes) {
791 unsigned num_bits;
792 if (leading_bytes < bitmap_page_units) {
793 num_bits = 8 * leading_bytes;
794 num_bytes -= leading_bytes;
795 } else {
796 num_bits = bitmap_page_units;
797 num_bytes -= sys_page_size;
798 }
799 result &= allocate_page_units(chunk, unit_index, num_bits);
800 unit_index += num_bits;
801 num_units -= num_bits;
802 }
803 while (num_bytes >= sys_page_size) {
804 result &= allocate_page_units(chunk, unit_index, bitmap_page_units);
805 unit_index += bitmap_page_units;
806 num_units -= bitmap_page_units;
807 num_bytes -= sys_page_size;
808 }
809 if (num_bytes) {
810 unsigned num_bits = 8 * num_bytes;
811 result &= allocate_page_units(chunk, unit_index, num_bits);
812 unit_index += num_bits;
813 num_units -= num_bits;
814 }
815 }
816
817 // finally, num_units contain trailing bits
818 if (num_units) {
819 TRACE("%u trailing bits\n", num_units);
820 uint8_t bitmask = (1 << num_units) - 1;
821# if DEBUG
822 // make sure we do not doubly allocate
823 if (*bitmap_addr & bitmask) {
824 ERR("trailing bits %x are already set\n", bitmask);
825 abort();
826 }
827# endif
828 *bitmap_addr |= bitmask;
829
830 result &= allocate_page_units(chunk, unit_index, num_units);
831 }
832
833 return result;
834}
835
836static void reclaim(Chunk* chunk, unsigned unit_index, unsigned num_units)
837/*
838 * Counterpart function of commit(). Does the opposite:
839 * Clear bits in chunk's bitmap thus marking area as free.
840 * Subtract the number of bits set (i.e. num_units) from num_bits_set.
841 * Reclaim pages from the data area.
842 */
843{
844 TRACE("unit_index=%u, num_units=%u\n", unit_index, num_units);
845 uint8_t* bitmap_addr = ((uint8_t*) chunk->reserved_start) + (unit_index >> 3);
846
847 // clear leading bits
848 unsigned leading_bit_index = unit_index & 7;
849 if (leading_bit_index) {
850 TRACE("leading bit index %u\n", leading_bit_index);
851 uint8_t bitmask = ~((1 << leading_bit_index) - 1);
852 unsigned num_bits;
853 if (num_units <= 8 - leading_bit_index) {
854 bitmask &= (1 << (num_units + leading_bit_index)) - 1;
855 num_bits = num_units;
856 num_units = 0;
857 } else {
858 num_bits = 8 - leading_bit_index;
859 num_units -= num_bits;
860 }
861# if DEBUG
862 // make sure we do not doubly free
863 if ((*bitmap_addr & bitmask) != bitmask) {
864 ERR("leading bits %x are already cleaned\n", bitmask);
865 abort();
866 }
867# endif
868 *bitmap_addr++ &= ~bitmask;
869
870 release_page_units(chunk, unit_index, num_bits);
871 unit_index += num_bits;
872 }
873
874 // set full bytes
875 unsigned num_bytes = num_units >> 3;
876 if (num_bytes) {
877 TRACE("%u full bytes\n", num_bytes);
878# if DEBUG
879 // make sure we do not doubly free
880 {
881 uint8_t *p = bitmap_addr;
882 for (unsigned i = 0; i < num_bytes; i++, p++) {
883 if (*p != 0xFF) {
884 ERR("middle byte %u is not all-ones\n", i);
885 abort();
886 }
887 }
888 }
889# endif
890 explicit_bzero(bitmap_addr, num_bytes);
891 bitmap_addr += num_bytes;
892
893 // update num_bits_set and commit data pages
894 unsigned leading_bytes = (bitmap_page_units - (unit_index & bitmap_page_units_bitmask)) >> 3;
895 if (leading_bytes > num_bytes) {
896 leading_bytes = num_bytes;
897 }
898 TRACE("unit index %u, %u leading bytes\n", unit_index, leading_bytes);
899 if (leading_bytes) {
900 unsigned num_bits;
901 if (leading_bytes <= sys_page_size) {
902 num_bits = 8 * leading_bytes;
903 num_bytes -= leading_bytes;
904 } else {
905 num_bits = bitmap_page_units;
906 num_bytes -= bitmap_page_units;
907 }
908 release_page_units(chunk, unit_index, num_bits);
909 unit_index += num_bits;
910 num_units -= num_bits;
911 }
912 while (num_bytes >= sys_page_size) {
913 release_page_units(chunk, unit_index, bitmap_page_units);
914 unit_index += bitmap_page_units;
915 num_units -= bitmap_page_units;
916 num_bytes -= sys_page_size;
917 }
918 if (num_bytes) {
919 unsigned num_bits = 8 * num_bytes;
920 release_page_units(chunk, unit_index, num_bits);
921 unit_index += num_bits;
922 num_units -= num_bits;
923 }
924 }
925
926 // finally, num_units contain trailing bits
927 if (num_units) {
928 TRACE("%u trailing bits\n", num_units);
929 uint8_t bitmask = (1 << num_units) - 1;
930# if DEBUG
931 // make sure we do not doubly free
932 if ((*bitmap_addr & bitmask) != bitmask) {
933 ERR("trailing bits %x are already cleaned\n", bitmask);
934 abort();
935 }
936# endif
937 *bitmap_addr &= ~bitmask;
938
939 release_page_units(chunk, unit_index, num_units);
940 }
941}
942
943static Chunk* find_available_chunk(unsigned nbytes, unsigned* chunk_index, unsigned* unit_index)
944/*
945 * unit_index: the bit index of unit in the bitmap
946 */
947{
948 unsigned index; // local copy of chunk index
949
950 /*
951 Find chunk with data size >= nbytes from which to start looking for free space.
952
953 Naive way:
954
955 unsigned index = 0;
956 while (chunk_data_size < nbytes) {
957 index++;
958 }
959 */
960 if (nbytes <= bitmap_page_bytes) {
961 // it's fast for small allocations
962 index = 0;
963 } else {
964 // allocations above sys_page_size require more work
965 // find how many pages the bitmap will take for nbytes:
966 // num_pages = nbytes / UNIT_SIZE / bitmap_page_units
967 unsigned lbi = first_leading_one(nbytes);
968 index = lbi - bitmap_page_bytes_pwr2;
969 if (nbytes > (1 << lbi)) {
970 index++;
971 }
972 TRACE("chose chunk %u for %u, leading bit %u\n", index, nbytes, lbi);
973 if (index >= MAX_CHUNKS) {
974 // serious error, actually
975 ERR("cannot get correct chunk index: %u is outside the maximum %u\n",
976 index, MAX_CHUNKS);
977 return nullptr;
978 }
979 }
980
981 // convert nbytes to units
982 unsigned num_units = bytes_to_units(nbytes);
983
984 // calculate how many bitmap pages this will take
985 unsigned full_pages = num_units >> bitmap_page_units_pwr2;
986 unsigned remaining_units = num_units & bitmap_page_units_bitmask;
987
988 // find chunk that has available space
989
990 Chunk* chunk = &chunks[index];
991
992 Chunk* fallback_chunk = nullptr;
993 unsigned fallback_index;
994
995 for (; index < MAX_CHUNKS; index++, chunk++) {
996 if (chunk->reserved_start == nullptr) {
997 // the chunk is not allocated yet
998 if (!fallback_chunk) {
999 // use it as fallback chunk if we won't find available space
1000 fallback_chunk = chunk;
1001 fallback_index = index;
1002 }
1003 // look for free space in allocated chunks only
1004 continue;
1005 }
1006 if (full_pages) {
1007 // requested block takes more than one bitmap page
1008
1009 TRACE("trying chunk %u for %u-bytes (%u-units) block spanning %u full pages and %u remaining units\n",
1010 index, nbytes, num_units, full_pages, remaining_units);
1011
1012 unsigned bitmap_page_index;
1013
1014 if (full_pages == 1) {
1015 if (!find_empty_bitmap_page(chunk, &bitmap_page_index)) {
1016 continue;
1017 }
1018 } else {
1019 if (!find_consecutive_empty_bitmap_pages(chunk, full_pages, &bitmap_page_index)) {
1020 continue;
1021 }
1022 }
1023
1024 if (remaining_units == 0) {
1025 // we've done!
1026 *chunk_index = index;
1027 *unit_index = bitmap_page_index_to_unit_index(bitmap_page_index);
1028 TRACE("success; unit index: %u\n", *unit_index);
1029 return chunk;
1030 }
1031
1032 // check surrounding pages
1033
1034 unsigned prev_units_avail = 0;
1035 unsigned next_units_avail = 0;
1036
1037 if (bitmap_page_index &&
1038 (sys_page_size - chunk->num_bits_set[bitmap_page_index - 1]) // have some units available
1039 ) {
1040 void* bitmap_page = bitmap_page_by_index(chunk, bitmap_page_index - 1);
1041 prev_units_avail = count_page_trailing_zeros(bitmap_page);
1042 }
1043
1044 if (remaining_units <= prev_units_avail) {
1045 // they'll fit on previous page
1046 // we've done!
1047 *chunk_index = index;
1048 *unit_index = bitmap_page_index_to_unit_index(bitmap_page_index) - remaining_units;
1049 TRACE("success with previous bitmap page; unit index: %u\n", *unit_index);
1050 return chunk;
1051 }
1052
1053 if (bitmap_page_index < chunk->num_bitmap_pages - 1 &&
1054 (sys_page_size - chunk->num_bits_set[bitmap_page_index + full_pages]) // have some units available
1055 ) {
1056 void* bitmap_page = bitmap_page_by_index(chunk, bitmap_page_index + 1);
1057 next_units_avail = count_page_leading_zeros(bitmap_page);
1058 }
1059
1060 if (remaining_units <= (prev_units_avail + next_units_avail)) {
1061 // we've done!
1062 *chunk_index = index;
1063 *unit_index = bitmap_page_index_to_unit_index(bitmap_page_index) - prev_units_avail;
1064 TRACE("success with surrounding pages; unit index: %u, %u prev units avail, %u next units avail\n",
1065 *unit_index, prev_units_avail, next_units_avail);
1066 return chunk;
1067 }
1068 } else {
1069
1070 TRACE("trying chunk %u for %u-bytes (%u-units) block\n",
1071 index, nbytes, num_units);
1072
1073 if (find_free_space(chunk, num_units, unit_index)) {
1074 *chunk_index = index;
1075 TRACE("success; unit index: %u\n", *unit_index);
1076 return chunk;
1077 }
1078 }
1079 }
1080 *chunk_index = fallback_index;
1081 *unit_index = 0;
1082 TRACE("using unallocated chunk %u for %u-bytes block (%u units)\n",
1083 fallback_index, nbytes, num_units);
1084 return fallback_chunk;
1085}
1086
1087static void* _allocate(unsigned nbytes)
1088{
1089 void* result = nullptr;
1090
1091 TRACE("nbytes=%u\n", nbytes);
1092
1093 if (nbytes == 0 || nbytes > max_chunk_data_size) {
1094 return result;
1095 }
1096
1097 enter_allocator();
1098
1099 // error handler for OOM in SIGSEGV handler
1100 if (sigsetjmp(jmptarget, 1)) {
1101 // out of memory
1102 TRACE("longjmp exit\n");
1103 result = nullptr;
1104 goto out;
1105 }
1106
1107 // Find chunk that has available space.
1108 unsigned chunk_index, unit_index;
1109 Chunk* chunk = find_available_chunk(nbytes, &chunk_index, &unit_index);
1110 if (!chunk) {
1111 // out of memory
1112 goto out;
1113 }
1114
1115 if (chunk->reserved_start == nullptr) {
1116 // allocate new chunk
1117 uint8_t* reserved_start = mmap(NULL, (size_t) chunk->reserved_end, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1118 if (reserved_start == MAP_FAILED) {
1119 SAY("mmap: %s\n", strerror(errno));
1120 goto out;
1121 }
1122 chunk->reserved_start = reserved_start;
1123 chunk->reserved_end = reserved_start + (ptrdiff_t) chunk->reserved_end;
1124 SAY("reserved chunk %zd: %p - %p\n", chunk - chunks, chunk->reserved_start, chunk->reserved_end);
1125
1126 unit_index = 0;
1127 }
1128 unsigned num_units = bytes_to_units(nbytes);
1129 if (!commit(chunk, unit_index, num_units)) {
1130 // out of memory
1131 reclaim(chunk, unit_index, num_units);
1132 goto out;
1133 }
1134
1135 bitmap_allocator.blocks_allocated++;
1136
1137 result = unit_index_to_address(chunk, unit_index);
1138
1139out:
1140 exit_allocator();
1141 TRACE("result=%p\n", result);
1142# ifdef DEBUG
1143 if (result) {
1144 // make sure the block is clean
1145 uint8_t* ptr = result;
1146 for (unsigned i = 0; i < nbytes; i++) {
1147 if (*ptr++) {
1148 ERR("allocated block is not clean\n");
1149 dump_hex(stderr, result, nbytes);
1150 abort();
1151 }
1152 }
1153 }
1154# endif
1155 return result;
1156}
1157
1158static Chunk* chunk_by_addr(void* block, unsigned* unit_index)
1159{
1160 Chunk* chunk = chunks;
1161 for (unsigned i = 0; i < MAX_CHUNKS; i++, chunk++) {
1162 if (chunk->reserved_start) {
1163 void* data_start = chunk_data_start(chunk);
1164 if (data_start <= block && block < chunk->reserved_end) {
1165 *unit_index = (((uint8_t*) block) - ((uint8_t*) data_start)) >> UNIT_PWR2;
1166 TRACE("chunk %u unit %u for address %p\n", i, *unit_index, block);
1167 return chunk;
1168 }
1169 }
1170 }
1171 return nullptr;
1172}
1173
1174static void _shrink(void* block, unsigned old_nbytes, unsigned new_nbytes)
1175{
1176 TRACE("block=%p old_nbytes=%u new_nbytes=%u\n", block, old_nbytes, new_nbytes);
1177
1178 if (new_nbytes == 0) {
1179 // XXX might be a serious error, but it's a caller's problem
1180 ERR("called with zero new_nbytes\n");
1181 return;
1182 }
1183 if (((ptrdiff_t) block) & (UNIT_SIZE - 1)) {
1184 ERR("called with badly aligned address %p\n", block);
1185 abort();
1186 }
1187 if (old_nbytes <= new_nbytes) {
1188 return;
1189 }
1190
1191 enter_allocator();
1192
1193 unsigned unit_index;
1194 uint8_t* block_end = ((uint8_t*) block) + new_nbytes;
1195 Chunk* chunk = chunk_by_addr(block_end, &unit_index);
1196 if (!chunk) {
1197 ERR("called with wrong address %p or new_nbytes %u\n", block, new_nbytes);
1198 abort();
1199 } else {
1200 unsigned old_units = bytes_to_units(old_nbytes);
1201 unsigned new_units = bytes_to_units(new_nbytes);
1202 reclaim(chunk, unit_index, old_units - new_units);
1203 }
1204
1205 exit_allocator();
1206}
1207
1208static bool _grow(void* block, unsigned old_nbytes, unsigned new_nbytes)
1209{
1210 TRACE("block=%p old_nbytes=%u new_nbytes=%u\n", block, old_nbytes, new_nbytes);
1211
1212 if (old_nbytes == 0) {
1213 // XXX might be a serious error, but it's a caller's problem
1214 ERR("called with zero old_nbytes\n");
1215 return false;
1216 }
1217 if (((ptrdiff_t) block) & (UNIT_SIZE - 1)) {
1218 ERR("called with badly aligned address %p\n", block);
1219 abort();
1220 }
1221 if (new_nbytes < old_nbytes) {
1222 return false;
1223 }
1224 if (new_nbytes == old_nbytes) {
1225 return true;
1226 }
1227
1228 enter_allocator();
1229
1230 unsigned unit_index;
1231 Chunk* chunk = chunk_by_addr(block, &unit_index);
1232 if (!chunk) {
1233 ERR("called with wrong address %p\n", block);
1234 abort();
1235 }
1236
1237 unsigned old_units = bytes_to_units(old_nbytes);
1238 unsigned new_units = bytes_to_units(new_nbytes);
1239 unsigned num_units = new_units - old_units;
1240 unsigned num_units_avail = count_consecutive_zero_bits(chunk, unit_index + old_units, num_units);
1241 bool result = false;
1242 if (num_units_avail == num_units) {
1243 if (commit(chunk, unit_index + old_units, num_units)) {
1244 result = true;
1245 }
1246 }
1247 exit_allocator();
1248 return result;
1249}
1250
1251static void _release(void** block_var, unsigned nbytes)
1252{
1253 void* block = *block_var;
1254 if (!block) {
1255 return;
1256 }
1257
1258 TRACE("block=%p nbytes=%u\n", block, nbytes);
1259
1260 if (nbytes == 0) {
1261 // XXX might be a serious error, but it's a caller's problem
1262 ERR("called with zero nbytes\n");
1263 return;
1264 }
1265 if (((ptrdiff_t) block) & (UNIT_SIZE - 1)) {
1266 ERR("called with badly aligned address %p\n", block);
1267 abort();
1268 }
1269
1270 enter_allocator();
1271
1272 unsigned unit_index;
1273 Chunk* chunk = chunk_by_addr(block, &unit_index);
1274 if (!chunk) {
1275 ERR("called with wrong address %p\n", block);
1276 abort();
1277 }
1278 reclaim(chunk, unit_index, bytes_to_units(nbytes));
1279 bitmap_allocator.blocks_allocated--;
1280
1281 exit_allocator();
1282 *block_var = nullptr;
1283}
1284
1285extern bool _generic_reallocate(Allocator* allocator, void** block_var,
1286 unsigned old_nbytes, unsigned new_nbytes, bool* addr_changed);
1287
1288static bool _reallocate(void** block_var, unsigned old_nbytes, unsigned new_nbytes, bool* addr_changed)
1289{
1290 return _generic_reallocate(&bitmap_allocator, block_var, old_nbytes, new_nbytes, addr_changed);
1291}
1292
1293#ifdef DEBUG
1294
1295 void print_chunks()
1296 {
1297 fputs("chunk: reserved_start - reserved_end; num_bitmap_pages; num_bits_set\n", stderr);
1298 Chunk* chunk = chunks;
1299 for (unsigned i = 0; i < MAX_CHUNKS; i++, chunk++) {
1300 fprintf(stderr, "%u: %p - %p; %u; %p\n",
1301 i, chunk->reserved_start, chunk->reserved_end, chunk->num_bitmap_pages, chunk->num_bits_set);
1302 }
1303 }
1304
1305 void dump_chunk(Chunk* chunk)
1306 {
1307 fprintf(stderr, "chunk %zd: %p - %p num_bitmap_pages=%u\n", chunk - chunks,
1308 chunk->reserved_start, chunk->reserved_end, chunk->num_bitmap_pages);
1309 if (!chunk->reserved_start) {
1310 return;
1311 }
1312 fprintf(stderr, "nbs_pages_committed=%u bitmap_pages_committed=%u data_pages_committed=%u\n",
1313 nbs_pages_committed, bitmap_pages_committed, data_pages_committed);
1314 fputs("bitmap page -- num_bits_set\n", stderr);
1315 for (unsigned i = 0; i < chunk->num_bitmap_pages; i++) {
1316 unsigned nbs = chunk->num_bits_set[i];
1317 if (nbs) {
1318 fprintf(stderr, "%u -- %u\n", i, nbs);
1319 }
1320 }
1321 dump_bitmap(stderr, chunk->reserved_start, chunk->num_bitmap_pages << sys_page_pwr2);
1322 }
1323
1324 static void _dump()
1325 {
1326 Chunk* chunk = chunks;
1327 for (unsigned i = 0; i < MAX_CHUNKS; i++, chunk++) {
1328 dump_chunk(chunk);
1329 }
1330 fputc('\n', stderr);
1331 }
1332
1333#endif
1334
1335Allocator bitmap_allocator = {
1336 .init = _init,
1337 .allocate = _allocate,
1338 .shrink = _shrink,
1339 .grow = _grow,
1340 .reallocate = _reallocate,
1341 .release = _release,
1342 .verbose = false,
1343 .blocks_allocated = 0
1344
1345# ifdef DEBUG
1346 , .trace = false,
1347 .dump = _dump
1348# endif
1349};