diff --git a/py/gc.c b/py/gc.c index 0c1f3961df3b..93e83aaad5b8 100644 --- a/py/gc.c +++ b/py/gc.c @@ -74,17 +74,22 @@ #define ATB_2_IS_FREE(a) (((a) & ATB_MASK_2) == 0) #define ATB_3_IS_FREE(a) (((a) & ATB_MASK_3) == 0) +#if MICROPY_GC_SPLIT_HEAP +#define NEXT_AREA(area) (area->next) +#else +#define NEXT_AREA(area) (NULL) +#endif + #define BLOCK_SHIFT(block) (2 * ((block) & (BLOCKS_PER_ATB - 1))) -#define ATB_GET_KIND(block) ((MP_STATE_MEM(gc_alloc_table_start)[(block) / BLOCKS_PER_ATB] >> BLOCK_SHIFT(block)) & 3) -#define ATB_ANY_TO_FREE(block) do { MP_STATE_MEM(gc_alloc_table_start)[(block) / BLOCKS_PER_ATB] &= (~(AT_MARK << BLOCK_SHIFT(block))); } while (0) -#define ATB_FREE_TO_HEAD(block) do { MP_STATE_MEM(gc_alloc_table_start)[(block) / BLOCKS_PER_ATB] |= (AT_HEAD << BLOCK_SHIFT(block)); } while (0) -#define ATB_FREE_TO_TAIL(block) do { MP_STATE_MEM(gc_alloc_table_start)[(block) / BLOCKS_PER_ATB] |= (AT_TAIL << BLOCK_SHIFT(block)); } while (0) -#define ATB_HEAD_TO_MARK(block) do { MP_STATE_MEM(gc_alloc_table_start)[(block) / BLOCKS_PER_ATB] |= (AT_MARK << BLOCK_SHIFT(block)); } while (0) -#define ATB_MARK_TO_HEAD(block) do { MP_STATE_MEM(gc_alloc_table_start)[(block) / BLOCKS_PER_ATB] &= (~(AT_TAIL << BLOCK_SHIFT(block))); } while (0) +#define ATB_GET_KIND(area, block) (((area)->gc_alloc_table_start[(block) / BLOCKS_PER_ATB] >> BLOCK_SHIFT(block)) & 3) +#define ATB_ANY_TO_FREE(area, block) do { area->gc_alloc_table_start[(block) / BLOCKS_PER_ATB] &= (~(AT_MARK << BLOCK_SHIFT(block))); } while (0) +#define ATB_FREE_TO_HEAD(area, block) do { area->gc_alloc_table_start[(block) / BLOCKS_PER_ATB] |= (AT_HEAD << BLOCK_SHIFT(block)); } while (0) +#define ATB_FREE_TO_TAIL(area, block) do { area->gc_alloc_table_start[(block) / BLOCKS_PER_ATB] |= (AT_TAIL << BLOCK_SHIFT(block)); } while (0) +#define ATB_HEAD_TO_MARK(area, block) do { area->gc_alloc_table_start[(block) / BLOCKS_PER_ATB] |= (AT_MARK << BLOCK_SHIFT(block)); } while (0) +#define ATB_MARK_TO_HEAD(area, block) do { area->gc_alloc_table_start[(block) / BLOCKS_PER_ATB] &= (~(AT_TAIL << BLOCK_SHIFT(block))); } while (0) -#define BLOCK_FROM_PTR(ptr) (((byte *)(ptr) - MP_STATE_MEM(gc_pool_start)) / BYTES_PER_BLOCK) -#define PTR_FROM_BLOCK(block) (((block) * BYTES_PER_BLOCK + (uintptr_t)MP_STATE_MEM(gc_pool_start))) -#define ATB_FROM_BLOCK(bl) ((bl) / BLOCKS_PER_ATB) +#define BLOCK_FROM_PTR(area, ptr) (((byte *)(ptr) - area->gc_pool_start) / BYTES_PER_BLOCK) +#define PTR_FROM_BLOCK(area, block) (((block) * BYTES_PER_BLOCK + (uintptr_t)area->gc_pool_start)) #if MICROPY_ENABLE_FINALISER // FTB = finaliser table byte @@ -92,9 +97,9 @@ #define BLOCKS_PER_FTB (8) -#define FTB_GET(block) ((MP_STATE_MEM(gc_finaliser_table_start)[(block) / BLOCKS_PER_FTB] >> ((block) & 7)) & 1) -#define FTB_SET(block) do { MP_STATE_MEM(gc_finaliser_table_start)[(block) / BLOCKS_PER_FTB] |= (1 << ((block) & 7)); } while (0) -#define FTB_CLEAR(block) do { MP_STATE_MEM(gc_finaliser_table_start)[(block) / BLOCKS_PER_FTB] &= (~(1 << ((block) & 7))); } while (0) +#define FTB_GET(area, block) ((area->gc_finaliser_table_start[(block) / BLOCKS_PER_FTB] >> ((block) & 7)) & 1) +#define FTB_SET(area, block) do { area->gc_finaliser_table_start[(block) / BLOCKS_PER_FTB] |= (1 << ((block) & 7)); } while (0) +#define FTB_CLEAR(area, block) do { area->gc_finaliser_table_start[(block) / BLOCKS_PER_FTB] &= (~(1 << ((block) & 7))); } while (0) #endif #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL @@ -106,11 +111,7 @@ #endif // TODO waste less memory; currently requires that all entries in alloc_table have a corresponding block in pool -void gc_init(void *start, void *end) { - // align end pointer on block boundary - end = (void *)((uintptr_t)end & (~(BYTES_PER_BLOCK - 1))); - DEBUG_printf("Initializing GC heap: %p..%p = " UINT_FMT " bytes\n", start, end, (byte *)end - (byte *)start); - +STATIC void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) { // calculate parameters for GC (T=total, A=alloc table, F=finaliser table, P=pool; all in bytes): // T = A + F + P // F = A * BLOCKS_PER_ATB / BLOCKS_PER_FTB @@ -118,36 +119,52 @@ void gc_init(void *start, void *end) { // => T = A * (1 + BLOCKS_PER_ATB / BLOCKS_PER_FTB + BLOCKS_PER_ATB * BYTES_PER_BLOCK) size_t total_byte_len = (byte *)end - (byte *)start; #if MICROPY_ENABLE_FINALISER - MP_STATE_MEM(gc_alloc_table_byte_len) = total_byte_len * MP_BITS_PER_BYTE / (MP_BITS_PER_BYTE + MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_FTB + MP_BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK); + area->gc_alloc_table_byte_len = total_byte_len * MP_BITS_PER_BYTE / (MP_BITS_PER_BYTE + MP_BITS_PER_BYTE * BLOCKS_PER_ATB / BLOCKS_PER_FTB + MP_BITS_PER_BYTE * BLOCKS_PER_ATB * BYTES_PER_BLOCK); #else - MP_STATE_MEM(gc_alloc_table_byte_len) = total_byte_len / (1 + MP_BITS_PER_BYTE / 2 * BYTES_PER_BLOCK); + area->gc_alloc_table_byte_len = total_byte_len / (1 + MP_BITS_PER_BYTE / 2 * BYTES_PER_BLOCK); #endif - MP_STATE_MEM(gc_alloc_table_start) = (byte *)start; + area->gc_alloc_table_start = (byte *)start; #if MICROPY_ENABLE_FINALISER - size_t gc_finaliser_table_byte_len = (MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB + BLOCKS_PER_FTB - 1) / BLOCKS_PER_FTB; - MP_STATE_MEM(gc_finaliser_table_start) = MP_STATE_MEM(gc_alloc_table_start) + MP_STATE_MEM(gc_alloc_table_byte_len); + size_t gc_finaliser_table_byte_len = (area->gc_alloc_table_byte_len * BLOCKS_PER_ATB + BLOCKS_PER_FTB - 1) / BLOCKS_PER_FTB; + area->gc_finaliser_table_start = area->gc_alloc_table_start + area->gc_alloc_table_byte_len; #endif - size_t gc_pool_block_len = MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB; - MP_STATE_MEM(gc_pool_start) = (byte *)end - gc_pool_block_len * BYTES_PER_BLOCK; - MP_STATE_MEM(gc_pool_end) = end; + size_t gc_pool_block_len = area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; + area->gc_pool_start = (byte *)end - gc_pool_block_len * BYTES_PER_BLOCK; + area->gc_pool_end = end; #if MICROPY_ENABLE_FINALISER - assert(MP_STATE_MEM(gc_pool_start) >= MP_STATE_MEM(gc_finaliser_table_start) + gc_finaliser_table_byte_len); + assert(area->gc_pool_start >= area->gc_finaliser_table_start + gc_finaliser_table_byte_len); #endif // clear ATBs - memset(MP_STATE_MEM(gc_alloc_table_start), 0, MP_STATE_MEM(gc_alloc_table_byte_len)); + memset(area->gc_alloc_table_start, 0, area->gc_alloc_table_byte_len); #if MICROPY_ENABLE_FINALISER // clear FTBs - memset(MP_STATE_MEM(gc_finaliser_table_start), 0, gc_finaliser_table_byte_len); + memset(area->gc_finaliser_table_start, 0, gc_finaliser_table_byte_len); #endif + area->gc_last_free_atb_index = 0; + + #if MICROPY_GC_SPLIT_HEAP + area->next = NULL; + #endif +} + +void gc_init(void *start, void *end) { + // align end pointer on block boundary + end = (void *)((uintptr_t)end & (~(BYTES_PER_BLOCK - 1))); + DEBUG_printf("Initializing GC heap: %p..%p = " UINT_FMT " bytes\n", start, end, (byte *)end - (byte *)start); + + gc_setup_area(&MP_STATE_MEM(area), start, end); + // set last free ATB index to start of heap - MP_STATE_MEM(gc_last_free_atb_index) = 0; + #if MICROPY_GC_SPLIT_HEAP + MP_STATE_MEM(gc_last_free_area) = &MP_STATE_MEM(area); + #endif // unlock the GC MP_STATE_THREAD(gc_lock_depth) = 0; @@ -173,6 +190,29 @@ void gc_init(void *start, void *end) { DEBUG_printf(" pool at %p, length " UINT_FMT " bytes, " UINT_FMT " blocks\n", MP_STATE_MEM(gc_pool_start), gc_pool_block_len * BYTES_PER_BLOCK, gc_pool_block_len); } +#if MICROPY_GC_SPLIT_HEAP +void gc_add(void *start, void *end) { + // Place the area struct at the start of the area. + mp_state_mem_area_t *area = (mp_state_mem_area_t *)start; + start = (void *)((uintptr_t)start + sizeof(mp_state_mem_area_t)); + + end = (void *)((uintptr_t)end & (~(BYTES_PER_BLOCK - 1))); + DEBUG_printf("Adding GC heap: %p..%p = " UINT_FMT " bytes\n", start, end, (byte *)end - (byte *)start); + + // Init this area + gc_setup_area(area, start, end); + + // Find the last registered area in the linked list + mp_state_mem_area_t *prev_area = &MP_STATE_MEM(area); + while (prev_area->next != NULL) { + prev_area = prev_area->next; + } + + // Add this area to the linked list + prev_area->next = area; +} +#endif + void gc_lock(void) { // This does not need to be atomic or have the GC mutex because: // - each thread has its own gc_lock_depth so there are no races between threads; @@ -190,12 +230,20 @@ bool gc_is_locked(void) { return MP_STATE_THREAD(gc_lock_depth) != 0; } -// ptr should be of type void* -#define VERIFY_PTR(ptr) ( \ - ((uintptr_t)(ptr) & (BYTES_PER_BLOCK - 1)) == 0 /* must be aligned on a block */ \ - && ptr >= (void *)MP_STATE_MEM(gc_pool_start) /* must be above start of pool */ \ - && ptr < (void *)MP_STATE_MEM(gc_pool_end) /* must be below end of pool */ \ - ) +// Returns the area to which this pointer belongs, or NULL if it isn't +// allocated on the GC-managed heap. +STATIC inline mp_state_mem_area_t *gc_get_ptr_area(const void *ptr) { + if (((uintptr_t)(ptr) & (BYTES_PER_BLOCK - 1)) != 0) { // must be aligned on a block + return NULL; + } + for (mp_state_mem_area_t *area = &MP_STATE_MEM(area); area != NULL; area = NEXT_AREA(area)) { + if (ptr >= (void *)area->gc_pool_start // must be above start of pool + && ptr < (void *)area->gc_pool_end) { // must be below end of pool + return area; + } + } + return NULL; +} #ifndef TRACE_MARK #if DEBUG_PRINT @@ -209,45 +257,64 @@ bool gc_is_locked(void) { // children: mark the unmarked child blocks and put those newly marked // blocks on the stack. When all children have been checked, pop off the // topmost block on the stack and repeat with that one. -STATIC void gc_mark_subtree(size_t block) { - // Start with the block passed in the argument. +STATIC void gc_mark_subtree(mp_gc_stack_item_t item) { + // Start with the item passed in the argument. size_t sp = 0; for (;;) { MICROPY_GC_HOOK_LOOP + + #if MICROPY_GC_SPLIT_HEAP + mp_state_mem_area_t *area = item.area; + #else + mp_state_mem_area_t *area = &MP_STATE_MEM(area); + #endif + size_t block = item.block; + // work out number of consecutive blocks in the chain starting with this one size_t n_blocks = 0; do { n_blocks += 1; - } while (ATB_GET_KIND(block + n_blocks) == AT_TAIL); + } while (ATB_GET_KIND(area, block + n_blocks) == AT_TAIL); // check this block's children - void **ptrs = (void **)PTR_FROM_BLOCK(block); + void **ptrs = (void **)PTR_FROM_BLOCK(area, block); for (size_t i = n_blocks * BYTES_PER_BLOCK / sizeof(void *); i > 0; i--, ptrs++) { MICROPY_GC_HOOK_LOOP void *ptr = *ptrs; - if (VERIFY_PTR(ptr)) { - // Mark and push this pointer - size_t childblock = BLOCK_FROM_PTR(ptr); - if (ATB_GET_KIND(childblock) == AT_HEAD) { - // an unmarked head, mark it, and push it on gc stack - TRACE_MARK(childblock, ptr); - ATB_HEAD_TO_MARK(childblock); - if (sp < MICROPY_ALLOC_GC_STACK_SIZE) { - MP_STATE_MEM(gc_stack)[sp++] = childblock; - } else { - MP_STATE_MEM(gc_stack_overflow) = 1; - } - } + // If this is a heap pointer that hasn't been marked, mark it and push + // it's children to the stack. + mp_state_mem_area_t *ptr_area = gc_get_ptr_area(ptr); + if (!ptr_area) { + // Not a heap-allocated pointer (might even be random data). + continue; + } + size_t ptr_block = BLOCK_FROM_PTR(ptr_area, ptr); + if (ATB_GET_KIND(ptr_area, ptr_block) != AT_HEAD) { + // This block is already marked. + continue; + } + // An unmarked head. Mark it, and push it on gc stack. + TRACE_MARK(ptr_block, ptr); + ATB_HEAD_TO_MARK(ptr_area, ptr_block); + if (sp < MICROPY_ALLOC_GC_STACK_SIZE) { + #if MICROPY_GC_SPLIT_HEAP + mp_gc_stack_item_t ptr_item = {ptr_area, ptr_block}; + #else + mp_gc_stack_item_t ptr_item = {ptr_block}; + #endif + MP_STATE_MEM(gc_stack)[sp++] = ptr_item; + } else { + MP_STATE_MEM(gc_stack_overflow) = 1; } } - // Are there any blocks on the stack? + // Are there any items on the stack? if (sp == 0) { break; // No, stack is empty, we're done. } - // pop the next block off the stack - block = MP_STATE_MEM(gc_stack)[--sp]; + // pop the next item off the stack + item = MP_STATE_MEM(gc_stack)[--sp]; } } @@ -256,11 +323,20 @@ STATIC void gc_deal_with_stack_overflow(void) { MP_STATE_MEM(gc_stack_overflow) = 0; // scan entire memory looking for blocks which have been marked but not their children - for (size_t block = 0; block < MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB; block++) { - MICROPY_GC_HOOK_LOOP - // trace (again) if mark bit set - if (ATB_GET_KIND(block) == AT_MARK) { - gc_mark_subtree(block); + for (mp_state_mem_area_t *area = &MP_STATE_MEM(area); area != NULL; area = NEXT_AREA(area)) { + for (size_t block = 0; block < area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; block++) { + MICROPY_GC_HOOK_LOOP + // trace (again) if mark bit set + if (ATB_GET_KIND(area, block) == AT_MARK) { + #if MICROPY_GC_SPLIT_HEAP + mp_gc_stack_item_t item = {area, block}; + #else + mp_gc_stack_item_t item = {block}; + #endif + // *MP_STATE_MEM(gc_sp)++ = item; + // gc_drain_stack(); + gc_mark_subtree(item); + } } } } @@ -272,53 +348,55 @@ STATIC void gc_sweep(void) { #endif // free unmarked heads and their tails int free_tail = 0; - for (size_t block = 0; block < MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB; block++) { - MICROPY_GC_HOOK_LOOP - switch (ATB_GET_KIND(block)) { - case AT_HEAD: - #if MICROPY_ENABLE_FINALISER - if (FTB_GET(block)) { - mp_obj_base_t *obj = (mp_obj_base_t *)PTR_FROM_BLOCK(block); - if (obj->type != NULL) { - // if the object has a type then see if it has a __del__ method - mp_obj_t dest[2]; - mp_load_method_maybe(MP_OBJ_FROM_PTR(obj), MP_QSTR___del__, dest); - if (dest[0] != MP_OBJ_NULL) { - // load_method returned a method, execute it in a protected environment - #if MICROPY_ENABLE_SCHEDULER - mp_sched_lock(); - #endif - mp_call_function_1_protected(dest[0], dest[1]); - #if MICROPY_ENABLE_SCHEDULER - mp_sched_unlock(); - #endif + for (mp_state_mem_area_t *area = &MP_STATE_MEM(area); area != NULL; area = NEXT_AREA(area)) { + for (size_t block = 0; block < area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; block++) { + MICROPY_GC_HOOK_LOOP + switch (ATB_GET_KIND(area, block)) { + case AT_HEAD: + #if MICROPY_ENABLE_FINALISER + if (FTB_GET(area, block)) { + mp_obj_base_t *obj = (mp_obj_base_t *)PTR_FROM_BLOCK(area, block); + if (obj->type != NULL) { + // if the object has a type then see if it has a __del__ method + mp_obj_t dest[2]; + mp_load_method_maybe(MP_OBJ_FROM_PTR(obj), MP_QSTR___del__, dest); + if (dest[0] != MP_OBJ_NULL) { + // load_method returned a method, execute it in a protected environment + #if MICROPY_ENABLE_SCHEDULER + mp_sched_lock(); + #endif + mp_call_function_1_protected(dest[0], dest[1]); + #if MICROPY_ENABLE_SCHEDULER + mp_sched_unlock(); + #endif + } } + // clear finaliser flag + FTB_CLEAR(area, block); } - // clear finaliser flag - FTB_CLEAR(block); - } - #endif - free_tail = 1; - DEBUG_printf("gc_sweep(%p)\n", (void *)PTR_FROM_BLOCK(block)); - #if MICROPY_PY_GC_COLLECT_RETVAL - MP_STATE_MEM(gc_collected)++; - #endif - // fall through to free the head - MP_FALLTHROUGH - - case AT_TAIL: - if (free_tail) { - ATB_ANY_TO_FREE(block); - #if CLEAR_ON_SWEEP - memset((void *)PTR_FROM_BLOCK(block), 0, BYTES_PER_BLOCK); #endif - } - break; + free_tail = 1; + DEBUG_printf("gc_sweep(%p)\n", (void *)PTR_FROM_BLOCK(area, block)); + #if MICROPY_PY_GC_COLLECT_RETVAL + MP_STATE_MEM(gc_collected)++; + #endif + // fall through to free the head + MP_FALLTHROUGH + + case AT_TAIL: + if (free_tail) { + ATB_ANY_TO_FREE(area, block); + #if CLEAR_ON_SWEEP + memset((void *)PTR_FROM_BLOCK(area, block), 0, BYTES_PER_BLOCK); + #endif + } + break; - case AT_MARK: - ATB_MARK_TO_HEAD(block); - free_tail = 0; - break; + case AT_MARK: + ATB_MARK_TO_HEAD(area, block); + free_tail = 0; + break; + } } } } @@ -360,13 +438,18 @@ void gc_collect_root(void **ptrs, size_t len) { for (size_t i = 0; i < len; i++) { MICROPY_GC_HOOK_LOOP void *ptr = gc_get_ptr(ptrs, i); - if (VERIFY_PTR(ptr)) { - size_t block = BLOCK_FROM_PTR(ptr); - if (ATB_GET_KIND(block) == AT_HEAD) { + mp_state_mem_area_t *area = gc_get_ptr_area(ptr); + if (area) { + size_t block = BLOCK_FROM_PTR(area, ptr); + if (ATB_GET_KIND(area, block) == AT_HEAD) { // An unmarked head: mark it, and mark all its children - TRACE_MARK(block, ptr); - ATB_HEAD_TO_MARK(block); - gc_mark_subtree(block); + ATB_HEAD_TO_MARK(area, block); + #if MICROPY_GC_SPLIT_HEAP + mp_gc_stack_item_t item = {area, block}; + #else + mp_gc_stack_item_t item = {block}; + #endif + gc_mark_subtree(item); } } } @@ -375,7 +458,12 @@ void gc_collect_root(void **ptrs, size_t len) { void gc_collect_end(void) { gc_deal_with_stack_overflow(); gc_sweep(); - MP_STATE_MEM(gc_last_free_atb_index) = 0; + #if MICROPY_GC_SPLIT_HEAP + MP_STATE_MEM(gc_last_free_area) = &MP_STATE_MEM(area); + #endif + for (mp_state_mem_area_t *area = &MP_STATE_MEM(area); area != NULL; area = NEXT_AREA(area)) { + area->gc_last_free_atb_index = 0; + } MP_STATE_THREAD(gc_lock_depth)--; GC_EXIT(); } @@ -389,59 +477,62 @@ void gc_sweep_all(void) { void gc_info(gc_info_t *info) { GC_ENTER(); - info->total = MP_STATE_MEM(gc_pool_end) - MP_STATE_MEM(gc_pool_start); + info->total = 0; info->used = 0; info->free = 0; info->max_free = 0; info->num_1block = 0; info->num_2block = 0; info->max_block = 0; - bool finish = false; - for (size_t block = 0, len = 0, len_free = 0; !finish;) { - size_t kind = ATB_GET_KIND(block); - switch (kind) { - case AT_FREE: - info->free += 1; - len_free += 1; - len = 0; - break; - - case AT_HEAD: - info->used += 1; - len = 1; - break; - - case AT_TAIL: - info->used += 1; - len += 1; - break; - - case AT_MARK: - // shouldn't happen - break; - } - - block++; - finish = (block == MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB); - // Get next block type if possible - if (!finish) { - kind = ATB_GET_KIND(block); - } - - if (finish || kind == AT_FREE || kind == AT_HEAD) { - if (len == 1) { - info->num_1block += 1; - } else if (len == 2) { - info->num_2block += 1; + for (mp_state_mem_area_t *area = &MP_STATE_MEM(area); area != NULL; area = NEXT_AREA(area)) { + bool finish = false; + info->total += area->gc_pool_end - area->gc_pool_start; + for (size_t block = 0, len = 0, len_free = 0; !finish;) { + size_t kind = ATB_GET_KIND(area, block); + switch (kind) { + case AT_FREE: + info->free += 1; + len_free += 1; + len = 0; + break; + + case AT_HEAD: + info->used += 1; + len = 1; + break; + + case AT_TAIL: + info->used += 1; + len += 1; + break; + + case AT_MARK: + // shouldn't happen + break; } - if (len > info->max_block) { - info->max_block = len; + + block++; + finish = (block == area->gc_alloc_table_byte_len * BLOCKS_PER_ATB); + // Get next block type if possible + if (!finish) { + kind = ATB_GET_KIND(area, block); } - if (finish || kind == AT_HEAD) { - if (len_free > info->max_free) { - info->max_free = len_free; + + if (finish || kind == AT_FREE || kind == AT_HEAD) { + if (len == 1) { + info->num_1block += 1; + } else if (len == 2) { + info->num_2block += 1; + } + if (len > info->max_block) { + info->max_block = len; + } + if (finish || kind == AT_HEAD) { + if (len_free > info->max_free) { + info->max_free = len_free; + } + len_free = 0; } - len_free = 0; } } } @@ -468,6 +559,7 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) { GC_ENTER(); + mp_state_mem_area_t *area; size_t i; size_t end_block; size_t start_block; @@ -485,16 +577,33 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) { for (;;) { + #if MICROPY_GC_SPLIT_HEAP + area = MP_STATE_MEM(gc_last_free_area); + #else + area = &MP_STATE_MEM(area); + #endif + // look for a run of n_blocks available blocks - n_free = 0; - for (i = MP_STATE_MEM(gc_last_free_atb_index); i < MP_STATE_MEM(gc_alloc_table_byte_len); i++) { - byte a = MP_STATE_MEM(gc_alloc_table_start)[i]; - // *FORMAT-OFF* - if (ATB_0_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 0; goto found; } } else { n_free = 0; } - if (ATB_1_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 1; goto found; } } else { n_free = 0; } - if (ATB_2_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 2; goto found; } } else { n_free = 0; } - if (ATB_3_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 3; goto found; } } else { n_free = 0; } - // *FORMAT-ON* + for (; area != NULL; area = NEXT_AREA(area), i = 0) { + n_free = 0; + for (i = area->gc_last_free_atb_index; i < area->gc_alloc_table_byte_len; i++) { + byte a = area->gc_alloc_table_start[i]; + // *FORMAT-OFF* + if (ATB_0_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 0; goto found; } } else { n_free = 0; } + if (ATB_1_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 1; goto found; } } else { n_free = 0; } + if (ATB_2_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 2; goto found; } } else { n_free = 0; } + if (ATB_3_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 3; goto found; } } else { n_free = 0; } + // *FORMAT-ON* + } + + // No free blocks found on this heap. Mark this heap as + // filled, so we won't try to find free space here again until + // space is freed. + #if MICROPY_GC_SPLIT_HEAP + if (n_blocks == 1) { + area->gc_last_free_atb_index = (i + 1) / BLOCKS_PER_ATB; // or (size_t)-1 + } + #endif } GC_EXIT(); @@ -520,21 +629,24 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) { // before this one. Also, whenever we free or shink a block we must check // if this index needs adjusting (see gc_realloc and gc_free). if (n_free == 1) { - MP_STATE_MEM(gc_last_free_atb_index) = (i + 1) / BLOCKS_PER_ATB; + #if MICROPY_GC_SPLIT_HEAP + MP_STATE_MEM(gc_last_free_area) = area; + #endif + area->gc_last_free_atb_index = (i + 1) / BLOCKS_PER_ATB; } // mark first block as used head - ATB_FREE_TO_HEAD(start_block); + ATB_FREE_TO_HEAD(area, start_block); // mark rest of blocks as used tail // TODO for a run of many blocks can make this more efficient for (size_t bl = start_block + 1; bl <= end_block; bl++) { - ATB_FREE_TO_TAIL(bl); + ATB_FREE_TO_TAIL(area, bl); } // get pointer to first block // we must create this pointer before unlocking the GC so a collection can find it - void *ret_ptr = (void *)(MP_STATE_MEM(gc_pool_start) + start_block * BYTES_PER_BLOCK); + void *ret_ptr = (void *)(area->gc_pool_start + start_block * BYTES_PER_BLOCK); DEBUG_printf("gc_alloc(%p)\n", ret_ptr); #if MICROPY_GC_ALLOC_THRESHOLD @@ -561,7 +673,7 @@ void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) { ((mp_obj_base_t *)ret_ptr)->type = NULL; // set mp_obj flag only if it has a finaliser GC_ENTER(); - FTB_SET(start_block); + FTB_SET(area, start_block); GC_EXIT(); } #else @@ -598,46 +710,65 @@ void gc_free(void *ptr) { DEBUG_printf("gc_free(%p)\n", ptr); if (ptr == NULL) { + // free(NULL) is a no-op GC_EXIT(); - } else { - // get the GC block number corresponding to this pointer - assert(VERIFY_PTR(ptr)); - size_t block = BLOCK_FROM_PTR(ptr); - assert(ATB_GET_KIND(block) == AT_HEAD); - - #if MICROPY_ENABLE_FINALISER - FTB_CLEAR(block); - #endif + return; + } - // set the last_free pointer to this block if it's earlier in the heap - if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_last_free_atb_index)) { - MP_STATE_MEM(gc_last_free_atb_index) = block / BLOCKS_PER_ATB; - } + // get the GC block number corresponding to this pointer + mp_state_mem_area_t *area = gc_get_ptr_area(ptr); + assert(area); + size_t block = BLOCK_FROM_PTR(area, ptr); + assert(ATB_GET_KIND(area, block) == AT_HEAD); - // free head and all of its tail blocks - do { - ATB_ANY_TO_FREE(block); - block += 1; - } while (ATB_GET_KIND(block) == AT_TAIL); + #if MICROPY_ENABLE_FINALISER + FTB_CLEAR(area, block); + #endif - GC_EXIT(); + #if MICROPY_GC_SPLIT_HEAP + if (MP_STATE_MEM(gc_last_free_area) != area) { + // We freed something but it isn't the current area. Reset the + // last free area to the start for a rescan. Note that this won't + // give much of a performance hit, since areas that are completely + // filled will likely be skipped (the gc_last_free_atb_index + // points to the last block). + // The reason why this is necessary is because it is not possible + // to see which area came first (like it is possible to adjust + // gc_last_free_atb_index based on whether the freed block is + // before the last free block). + MP_STATE_MEM(gc_last_free_area) = &MP_STATE_MEM(area); + } + #endif - #if EXTENSIVE_HEAP_PROFILING - gc_dump_alloc_table(); - #endif + // set the last_free pointer to this block if it's earlier in the heap + if (block / BLOCKS_PER_ATB < area->gc_last_free_atb_index) { + area->gc_last_free_atb_index = block / BLOCKS_PER_ATB; } + + // free head and all of its tail blocks + do { + ATB_ANY_TO_FREE(area, block); + block += 1; + } while (ATB_GET_KIND(area, block) == AT_TAIL); + + GC_EXIT(); + + #if EXTENSIVE_HEAP_PROFILING + gc_dump_alloc_table(); + #endif } size_t gc_nbytes(const void *ptr) { GC_ENTER(); - if (VERIFY_PTR(ptr)) { - size_t block = BLOCK_FROM_PTR(ptr); - if (ATB_GET_KIND(block) == AT_HEAD) { + mp_state_mem_area_t *area = gc_get_ptr_area(ptr); + if (area) { + size_t block = BLOCK_FROM_PTR(area, ptr); + if (ATB_GET_KIND(area, block) == AT_HEAD) { // work out number of consecutive blocks in the chain starting with this on size_t n_blocks = 0; do { n_blocks += 1; - } while (ATB_GET_KIND(block + n_blocks) == AT_TAIL); + } while (ATB_GET_KIND(area, block + n_blocks) == AT_TAIL); GC_EXIT(); return n_blocks * BYTES_PER_BLOCK; } @@ -698,9 +829,10 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { GC_ENTER(); // get the GC block number corresponding to this pointer - assert(VERIFY_PTR(ptr)); - size_t block = BLOCK_FROM_PTR(ptr); - assert(ATB_GET_KIND(block) == AT_HEAD); + mp_state_mem_area_t *area = gc_get_ptr_area(ptr); + assert(area); + size_t block = BLOCK_FROM_PTR(area, ptr); + assert(ATB_GET_KIND(area, block) == AT_HEAD); // compute number of new blocks that are requested size_t new_blocks = (n_bytes + BYTES_PER_BLOCK - 1) / BYTES_PER_BLOCK; @@ -713,9 +845,9 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { // efficiently shrink it (see below for shrinking code). size_t n_free = 0; size_t n_blocks = 1; // counting HEAD block - size_t max_block = MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB; + size_t max_block = area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; for (size_t bl = block + n_blocks; bl < max_block; bl++) { - byte block_type = ATB_GET_KIND(bl); + byte block_type = ATB_GET_KIND(area, bl); if (block_type == AT_TAIL) { n_blocks++; continue; @@ -741,12 +873,19 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { if (new_blocks < n_blocks) { // free unneeded tail blocks for (size_t bl = block + new_blocks, count = n_blocks - new_blocks; count > 0; bl++, count--) { - ATB_ANY_TO_FREE(bl); + ATB_ANY_TO_FREE(area, bl); } + #if MICROPY_GC_SPLIT_HEAP + if (MP_STATE_MEM(gc_last_free_area) != area) { + // See comment in gc_free. + MP_STATE_MEM(gc_last_free_area) = &MP_STATE_MEM(area); + } + #endif + // set the last_free pointer to end of this block if it's earlier in the heap - if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_last_free_atb_index)) { - MP_STATE_MEM(gc_last_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB; + if ((block + new_blocks) / BLOCKS_PER_ATB < area->gc_last_free_atb_index) { + area->gc_last_free_atb_index = (block + new_blocks) / BLOCKS_PER_ATB; } GC_EXIT(); @@ -762,8 +901,8 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { if (new_blocks <= n_blocks + n_free) { // mark few more blocks as used tail for (size_t bl = block + n_blocks; bl < block + new_blocks; bl++) { - assert(ATB_GET_KIND(bl) == AT_FREE); - ATB_FREE_TO_TAIL(bl); + assert(ATB_GET_KIND(area, bl) == AT_FREE); + ATB_FREE_TO_TAIL(area, bl); } GC_EXIT(); @@ -784,7 +923,7 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { } #if MICROPY_ENABLE_FINALISER - bool ftb_state = FTB_GET(block); + bool ftb_state = FTB_GET(area, block); #else bool ftb_state = false; #endif @@ -823,129 +962,131 @@ void gc_dump_info(void) { void gc_dump_alloc_table(void) { GC_ENTER(); static const size_t DUMP_BYTES_PER_LINE = 64; - #if !EXTENSIVE_HEAP_PROFILING - // When comparing heap output we don't want to print the starting - // pointer of the heap because it changes from run to run. - mp_printf(&mp_plat_print, "GC memory layout; from %p:", MP_STATE_MEM(gc_pool_start)); - #endif - for (size_t bl = 0; bl < MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB; bl++) { - if (bl % DUMP_BYTES_PER_LINE == 0) { - // a new line of blocks - { - // check if this line contains only free blocks - size_t bl2 = bl; - while (bl2 < MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB && ATB_GET_KIND(bl2) == AT_FREE) { - bl2++; - } - if (bl2 - bl >= 2 * DUMP_BYTES_PER_LINE) { - // there are at least 2 lines containing only free blocks, so abbreviate their printing - mp_printf(&mp_plat_print, "\n (%u lines all free)", (uint)(bl2 - bl) / DUMP_BYTES_PER_LINE); - bl = bl2 & (~(DUMP_BYTES_PER_LINE - 1)); - if (bl >= MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB) { - // got to end of heap - break; - } - } - } - // print header for new line of blocks - // (the cast to uint32_t is for 16-bit ports) - // mp_printf(&mp_plat_print, "\n%05x: ", (uint)(PTR_FROM_BLOCK(bl) & (uint32_t)0xfffff)); - mp_printf(&mp_plat_print, "\n%05x: ", (uint)((bl * BYTES_PER_BLOCK) & (uint32_t)0xfffff)); - } - int c = ' '; - switch (ATB_GET_KIND(bl)) { - case AT_FREE: - c = '.'; - break; - /* this prints out if the object is reachable from BSS or STACK (for unix only) - case AT_HEAD: { - c = 'h'; - void **ptrs = (void**)(void*)&mp_state_ctx; - mp_uint_t len = offsetof(mp_state_ctx_t, vm.stack_top) / sizeof(mp_uint_t); - for (mp_uint_t i = 0; i < len; i++) { - mp_uint_t ptr = (mp_uint_t)ptrs[i]; - if (VERIFY_PTR(ptr) && BLOCK_FROM_PTR(ptr) == bl) { - c = 'B'; - break; + for (mp_state_mem_area_t *area = &MP_STATE_MEM(area); area != NULL; area = NEXT_AREA(area)) { + #if !EXTENSIVE_HEAP_PROFILING + // When comparing heap output we don't want to print the starting + // pointer of the heap because it changes from run to run. + mp_printf(&mp_plat_print, "GC memory layout; from %p:", area->gc_pool_start); + #endif + for (size_t bl = 0; bl < area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; bl++) { + if (bl % DUMP_BYTES_PER_LINE == 0) { + // a new line of blocks + { + // check if this line contains only free blocks + size_t bl2 = bl; + while (bl2 < area->gc_alloc_table_byte_len * BLOCKS_PER_ATB && ATB_GET_KIND(area, bl2) == AT_FREE) { + bl2++; } - } - if (c == 'h') { - ptrs = (void**)&c; - len = ((mp_uint_t)MP_STATE_THREAD(stack_top) - (mp_uint_t)&c) / sizeof(mp_uint_t); - for (mp_uint_t i = 0; i < len; i++) { - mp_uint_t ptr = (mp_uint_t)ptrs[i]; - if (VERIFY_PTR(ptr) && BLOCK_FROM_PTR(ptr) == bl) { - c = 'S'; + if (bl2 - bl >= 2 * DUMP_BYTES_PER_LINE) { + // there are at least 2 lines containing only free blocks, so abbreviate their printing + mp_printf(&mp_plat_print, "\n (%u lines all free)", (uint)(bl2 - bl) / DUMP_BYTES_PER_LINE); + bl = bl2 & (~(DUMP_BYTES_PER_LINE - 1)); + if (bl >= area->gc_alloc_table_byte_len * BLOCKS_PER_ATB) { + // got to end of heap break; } } } - break; + // print header for new line of blocks + // (the cast to uint32_t is for 16-bit ports) + // mp_printf(&mp_plat_print, "\n%05x: ", (uint)(PTR_FROM_BLOCK(area, bl) & (uint32_t)0xfffff)); + mp_printf(&mp_plat_print, "\n%05x: ", (uint)((bl * BYTES_PER_BLOCK) & (uint32_t)0xfffff)); } - */ - /* this prints the uPy object type of the head block */ - case AT_HEAD: { - void **ptr = (void **)(MP_STATE_MEM(gc_pool_start) + bl * BYTES_PER_BLOCK); - if (*ptr == &mp_type_tuple) { - c = 'T'; - } else if (*ptr == &mp_type_list) { - c = 'L'; - } else if (*ptr == &mp_type_dict) { - c = 'D'; - } else if (*ptr == &mp_type_str || *ptr == &mp_type_bytes) { - c = 'S'; - } - #if MICROPY_PY_BUILTINS_BYTEARRAY - else if (*ptr == &mp_type_bytearray) { - c = 'A'; - } - #endif - #if MICROPY_PY_ARRAY - else if (*ptr == &mp_type_array) { - c = 'A'; - } - #endif - #if MICROPY_PY_BUILTINS_FLOAT - else if (*ptr == &mp_type_float) { - c = 'F'; - } - #endif - else if (*ptr == &mp_type_fun_bc) { - c = 'B'; - } else if (*ptr == &mp_type_module) { - c = 'M'; - } else { + int c = ' '; + switch (ATB_GET_KIND(area, bl)) { + case AT_FREE: + c = '.'; + break; + /* this prints out if the object is reachable from BSS or STACK (for unix only) + case AT_HEAD: { c = 'h'; - #if 0 - // This code prints "Q" for qstr-pool data, and "q" for qstr-str - // data. It can be useful to see how qstrs are being allocated, - // but is disabled by default because it is very slow. - for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); c == 'h' && pool != NULL; pool = pool->prev) { - if ((const qstr_pool_t *)ptr == pool) { - c = 'Q'; + void **ptrs = (void**)(void*)&mp_state_ctx; + mp_uint_t len = offsetof(mp_state_ctx_t, vm.stack_top) / sizeof(mp_uint_t); + for (mp_uint_t i = 0; i < len; i++) { + mp_uint_t ptr = (mp_uint_t)ptrs[i]; + if (gc_get_ptr_area(ptr) && BLOCK_FROM_PTR(ptr) == bl) { + c = 'B'; break; } - for (const char *const *q = pool->qstrs, *const *q_top = pool->qstrs + pool->len; q < q_top; q++) { - if ((const char *)ptr == *q) { - c = 'q'; + } + if (c == 'h') { + ptrs = (void**)&c; + len = ((mp_uint_t)MP_STATE_THREAD(stack_top) - (mp_uint_t)&c) / sizeof(mp_uint_t); + for (mp_uint_t i = 0; i < len; i++) { + mp_uint_t ptr = (mp_uint_t)ptrs[i]; + if (gc_get_ptr_area(ptr) && BLOCK_FROM_PTR(ptr) == bl) { + c = 'S'; break; } } } + break; + } + */ + /* this prints the uPy object type of the head block */ + case AT_HEAD: { + void **ptr = (void **)(area->gc_pool_start + bl * BYTES_PER_BLOCK); + if (*ptr == &mp_type_tuple) { + c = 'T'; + } else if (*ptr == &mp_type_list) { + c = 'L'; + } else if (*ptr == &mp_type_dict) { + c = 'D'; + } else if (*ptr == &mp_type_str || *ptr == &mp_type_bytes) { + c = 'S'; + } + #if MICROPY_PY_BUILTINS_BYTEARRAY + else if (*ptr == &mp_type_bytearray) { + c = 'A'; + } #endif + #if MICROPY_PY_ARRAY + else if (*ptr == &mp_type_array) { + c = 'A'; + } + #endif + #if MICROPY_PY_BUILTINS_FLOAT + else if (*ptr == &mp_type_float) { + c = 'F'; + } + #endif + else if (*ptr == &mp_type_fun_bc) { + c = 'B'; + } else if (*ptr == &mp_type_module) { + c = 'M'; + } else { + c = 'h'; + #if 0 + // This code prints "Q" for qstr-pool data, and "q" for qstr-str + // data. It can be useful to see how qstrs are being allocated, + // but is disabled by default because it is very slow. + for (qstr_pool_t *pool = MP_STATE_VM(last_pool); c == 'h' && pool != NULL; pool = pool->prev) { + if ((qstr_pool_t *)ptr == pool) { + c = 'Q'; + break; + } + for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { + if ((const byte *)ptr == *q) { + c = 'q'; + break; + } + } + } + #endif + } + break; } - break; + case AT_TAIL: + c = '='; + break; + case AT_MARK: + c = 'm'; + break; } - case AT_TAIL: - c = '='; - break; - case AT_MARK: - c = 'm'; - break; + mp_printf(&mp_plat_print, "%c", c); } - mp_printf(&mp_plat_print, "%c", c); + mp_print_str(&mp_plat_print, "\n"); } - mp_print_str(&mp_plat_print, "\n"); GC_EXIT(); } diff --git a/py/gc.h b/py/gc.h index 5aef27c006b9..bb4204b06f43 100644 --- a/py/gc.h +++ b/py/gc.h @@ -28,9 +28,15 @@ #include #include +#include "py/mpconfig.h" void gc_init(void *start, void *end); +#if MICROPY_GC_SPLIT_HEAP +// Used to add additional memory areas to the heap. +void gc_add(void *start, void *end); +#endif + // These lock/unlock functions can be nested. // They can be used to prevent the GC from allocating/freeing. void gc_lock(void); diff --git a/py/mpconfig.h b/py/mpconfig.h index ee0123be34ea..25f37ff8b3a2 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -606,6 +606,11 @@ #define MICROPY_ENABLE_GC (0) #endif +// Whether the garbage-collected heap can be split over multiple memory areas. +#ifndef MICROPY_GC_SPLIT_HEAP +#define MICROPY_GC_SPLIT_HEAP (0) +#endif + // Hook to run code during time consuming garbage collector operations #ifndef MICROPY_GC_HOOK_LOOP #define MICROPY_GC_HOOK_LOOP diff --git a/py/mpstate.h b/py/mpstate.h index 1b6e623ca9f6..ea069c24dd1b 100644 --- a/py/mpstate.h +++ b/py/mpstate.h @@ -71,12 +71,11 @@ typedef struct _mp_sched_item_t { mp_obj_t arg; } mp_sched_item_t; -// This structure hold information about the memory allocation system. -typedef struct _mp_state_mem_t { - #if MICROPY_MEM_STATS - size_t total_bytes_allocated; - size_t current_bytes_allocated; - size_t peak_bytes_allocated; +// This structure holds information about a single contiguous area of +// memory reserved for the memory manager. +typedef struct _mp_state_mem_area_t { + #if MICROPY_GC_SPLIT_HEAP + struct _mp_state_mem_area_t *next; #endif byte *gc_alloc_table_start; @@ -87,8 +86,30 @@ typedef struct _mp_state_mem_t { byte *gc_pool_start; byte *gc_pool_end; + size_t gc_last_free_atb_index; +} mp_state_mem_area_t; + +// This structure holds a single stacked block and the area it is on. Used +// during garbage collection. +typedef struct { + #if MICROPY_GC_SPLIT_HEAP + mp_state_mem_area_t *area; + #endif + size_t block; +} mp_gc_stack_item_t; + +// This structure hold information about the memory allocation system. +typedef struct _mp_state_mem_t { + #if MICROPY_MEM_STATS + size_t total_bytes_allocated; + size_t current_bytes_allocated; + size_t peak_bytes_allocated; + #endif + + mp_state_mem_area_t area; + int gc_stack_overflow; - MICROPY_GC_STACK_ENTRY_TYPE gc_stack[MICROPY_ALLOC_GC_STACK_SIZE]; + mp_gc_stack_item_t gc_stack[MICROPY_ALLOC_GC_STACK_SIZE]; // This variable controls auto garbage collection. If set to 0 then the // GC won't automatically run when gc_alloc can't find enough blocks. But @@ -100,7 +121,9 @@ typedef struct _mp_state_mem_t { size_t gc_alloc_threshold; #endif - size_t gc_last_free_atb_index; + #if MICROPY_GC_SPLIT_HEAP + mp_state_mem_area_t *gc_last_free_area; + #endif #if MICROPY_PY_GC_COLLECT_RETVAL size_t gc_collected;