Skip to content

Commit

Permalink
test performance of upstream linked list PR
Browse files Browse the repository at this point in the history
  • Loading branch information
d-netto committed Jul 11, 2024
1 parent c8f56c6 commit 96d6356
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 113 deletions.
8 changes: 4 additions & 4 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ static void clear_mark(int bits)
}
bigval_t *v;
for (int i = 0; i < gc_n_threads; i++) {
v = gc_all_tls_states[i]->heap.big_objects;
v = gc_all_tls_states[i]->heap.young_generation_of_bigvals;
while (v != NULL) {
void *gcv = &v->header;
if (!gc_verifying)
Expand All @@ -142,7 +142,7 @@ static void clear_mark(int bits)
}
}

v = big_objects_marked;
v = oldest_generation_of_bigvals;
while (v != NULL) {
void *gcv = &v->header;
if (!gc_verifying)
Expand Down Expand Up @@ -994,15 +994,15 @@ void gc_stats_big_obj(void)
size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
bigval_t *v = ptls2->heap.big_objects;
bigval_t *v = ptls2->heap.young_generation_of_bigvals;
while (v != NULL) {
if (gc_marked(v->bits.gc)) {
nused++;
nbytes += v->sz & ~3;
}
v = v->next;
}
v = big_objects_marked;
v = oldest_generation_of_bigvals;
while (v != NULL) {
if (gc_marked(v->bits.gc)) {
nused_old++;
Expand Down
162 changes: 78 additions & 84 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ uv_cond_t gc_threads_cond;
uv_sem_t gc_sweep_assists_needed;
// Mutex used to coordinate entry of GC threads in the mark loop
uv_mutex_t gc_queue_observer_lock;
// Tag for sentinel nodes in bigval list
uintptr_t gc_bigval_sentinel_tag;

// Linked list of callback functions

Expand Down Expand Up @@ -150,7 +152,6 @@ JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t
// is going to realloc the buffer (of its own list) or accessing the
// list of another thread
static jl_mutex_t finalizers_lock;
static uv_mutex_t gc_cache_lock;

// mutex for gc-heap-snapshot.
jl_mutex_t heapsnapshot_lock;
Expand Down Expand Up @@ -201,8 +202,8 @@ JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT
return jl_buff_tag;
}

// List of marked big objects. Not per-thread. Accessed only by master thread.
bigval_t *big_objects_marked = NULL;
// List of big objects in oldest generation (`GC_OLD_MARKED`). Not per-thread. Accessed only by master thread.
bigval_t *oldest_generation_of_bigvals = NULL;

// -- Finalization --
// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
Expand Down Expand Up @@ -759,60 +760,25 @@ static int64_t t_start = 0; // Time GC starts;
static int64_t last_trim_maxrss = 0;
#endif

static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
static void gc_sync_cache(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
{
const int nbig = gc_cache->nbig_obj;
for (int i = 0; i < nbig; i++) {
void *ptr = gc_cache->big_obj[i];
bigval_t *hdr = (bigval_t*)gc_ptr_clear_tag(ptr, 1);
gc_big_object_unlink(hdr);
if (gc_ptr_tag(ptr, 1)) {
gc_big_object_link(hdr, &ptls->heap.big_objects);
}
else {
// Move hdr from `big_objects` list to `big_objects_marked list`
gc_big_object_link(hdr, &big_objects_marked);
}
}
gc_cache->nbig_obj = 0;
perm_scanned_bytes += gc_cache->perm_scanned_bytes;
scanned_bytes += gc_cache->scanned_bytes;
gc_cache->perm_scanned_bytes = 0;
gc_cache->scanned_bytes = 0;
}

static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
uv_mutex_lock(&gc_cache_lock);
gc_sync_cache_nolock(ptls, &ptls->gc_cache);
uv_mutex_unlock(&gc_cache_lock);
}

// No other threads can be running marking at the same time
static void gc_sync_all_caches_nolock(jl_ptls_t ptls)
static void gc_sync_all_caches(jl_ptls_t ptls)
{
assert(gc_n_threads);
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL)
gc_sync_cache_nolock(ptls, &ptls2->gc_cache);
gc_sync_cache(ptls, &ptls2->gc_cache);
}
}

STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
int toyoung) JL_NOTSAFEPOINT
{
const int nentry = sizeof(ptls->gc_cache.big_obj) / sizeof(void*);
size_t nobj = ptls->gc_cache.nbig_obj;
if (__unlikely(nobj >= nentry)) {
gc_sync_cache(ptls);
nobj = 0;
}
uintptr_t v = (uintptr_t)hdr;
ptls->gc_cache.big_obj[nobj] = (void*)(toyoung ? (v | 1) : v);
ptls->gc_cache.nbig_obj = nobj + 1;
}

// Atomically set the mark bit for object and return whether it was previously unmarked
FORCE_INLINE int gc_try_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode) JL_NOTSAFEPOINT
{
Expand Down Expand Up @@ -849,16 +815,14 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
bigval_t *hdr = bigval_header(o);
if (mark_mode == GC_OLD_MARKED) {
ptls->gc_cache.perm_scanned_bytes += hdr->sz;
gc_queue_big_marked(ptls, hdr, 0);
}
else {
ptls->gc_cache.scanned_bytes += hdr->sz;
// We can't easily tell if the object is old or being promoted
// from the gc bits but if the `age` is `0` then the object
// must be already on a young list.
if (mark_reset_age) {
assert(jl_atomic_load(&gc_n_threads_marking) == 0); // `mark_reset_age` is only used during single-threaded marking
// Reset the object as if it was just allocated
gc_queue_big_marked(ptls, hdr, 1);
gc_big_object_unlink(hdr);
gc_big_object_link(ptls->heap.young_generation_of_bigvals, hdr);
}
}
}
Expand Down Expand Up @@ -1023,7 +987,7 @@ STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
memset(v, 0xee, allocsz);
#endif
v->sz = allocsz;
gc_big_object_link(v, &ptls->heap.big_objects);
gc_big_object_link(ptls->heap.young_generation_of_bigvals, v);
return jl_valueof(&v->header);
}

Expand All @@ -1049,60 +1013,85 @@ jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) {
return jl_gc_big_alloc_inner(ptls, sz);
}

// Sweep list rooted at *pv, removing and freeing any unmarked objects.
// Return pointer to last `next` field in the culled list.
static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
FORCE_INLINE void sweep_unlink_and_free(bigval_t *v) JL_NOTSAFEPOINT
{
gc_big_object_unlink(v);
gc_num.freed += v->sz;
#ifdef MEMDEBUG
memset(v, 0xbb, v->sz);
#endif
gc_invoke_callbacks(jl_gc_cb_notify_external_free_t, gc_cblist_notify_external_free, (v));
jl_free_aligned(v);
}

static bigval_t *sweep_list_of_young_bigvals(bigval_t *young) JL_NOTSAFEPOINT
{
bigval_t *v = *pv;
bigval_t *last_node = young;
bigval_t *v = young->next; // skip the sentinel
bigval_t *old = oldest_generation_of_bigvals;
int sweep_full = current_sweep_full; // don't load the global in the hot loop
while (v != NULL) {
bigval_t *nxt = v->next;
int bits = v->bits.gc;
int old_bits = bits;
if (gc_marked(bits)) {
pv = &v->next;
if (sweep_full || bits == GC_MARKED) {
bits = GC_OLD;
last_node = v;
}
else { // `bits == GC_OLD_MARKED`
assert(bits == GC_OLD_MARKED);
// reached oldest generation, move from young list to old list
gc_big_object_unlink(v);
gc_big_object_link(old, v);
}
v->bits.gc = bits;
}
else {
// Remove v from list and free it
*pv = nxt;
if (nxt)
nxt->prev = pv;
gc_num.freed += v->sz;
#ifdef MEMDEBUG
memset(v, 0xbb, v->sz);
#endif
gc_invoke_callbacks(jl_gc_cb_notify_external_free_t,
gc_cblist_notify_external_free, (v));
jl_free_aligned(v);
sweep_unlink_and_free(v);
}
gc_time_count_big(old_bits, bits);
v = nxt;
}
return pv;
return last_node;
}

static void sweep_list_of_oldest_bigvals(bigval_t *young) JL_NOTSAFEPOINT
{
bigval_t *v = oldest_generation_of_bigvals->next; // skip the sentinel
while (v != NULL) {
bigval_t *nxt = v->next;
assert(v->bits.gc == GC_OLD_MARKED);
v->bits.gc = GC_OLD;
gc_time_count_big(GC_OLD_MARKED, GC_OLD);
v = nxt;
}
}

static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
gc_time_big_start();
assert(gc_n_threads);
bigval_t *last_node_in_my_list = NULL;
for (int i = 0; i < gc_n_threads; i++) {
jl_ptls_t ptls2 = gc_all_tls_states[i];
if (ptls2 != NULL)
sweep_big_list(sweep_full, &ptls2->heap.big_objects);
if (ptls2 != NULL) {
bigval_t *last_node = sweep_list_of_young_bigvals(ptls2->heap.young_generation_of_bigvals);
if (ptls == ptls2) {
last_node_in_my_list = last_node;
}
}
}
if (sweep_full) {
bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked);
// Move all survivors from big_objects_marked list to the big_objects list of this thread.
if (ptls->heap.big_objects)
ptls->heap.big_objects->prev = last_next;
*last_next = ptls->heap.big_objects;
ptls->heap.big_objects = big_objects_marked;
if (ptls->heap.big_objects)
ptls->heap.big_objects->prev = &ptls->heap.big_objects;
big_objects_marked = NULL;
if (current_sweep_full) {
sweep_list_of_oldest_bigvals(ptls->heap.young_generation_of_bigvals);
// move all nodes in `oldest_generation_of_bigvals` to my list of bigvals
assert(last_node_in_my_list != NULL);
assert(last_node_in_my_list->next == NULL);
last_node_in_my_list->next = oldest_generation_of_bigvals->next; // skip the sentinel
if (oldest_generation_of_bigvals->next != NULL) {
oldest_generation_of_bigvals->next->prev = last_node_in_my_list;
}
oldest_generation_of_bigvals->next = NULL;
}
gc_time_big_end();
}
Expand Down Expand Up @@ -1548,10 +1537,10 @@ STATIC_INLINE void gc_sweep_pool_page(gc_page_profiler_serializer_t *s, jl_gc_pa
}

// sweep over all memory that is being used and not in a pool
static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
static void gc_sweep_other(jl_ptls_t ptls) JL_NOTSAFEPOINT
{
sweep_malloced_arrays();
sweep_big(ptls, sweep_full);
sweep_big(ptls);
}

static void gc_pool_sync_nfree(jl_gc_pagemeta_t *pg, jl_taggedvalue_t *last) JL_NOTSAFEPOINT
Expand Down Expand Up @@ -3537,7 +3526,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
// marking is over

// Flush everything in mark cache
gc_sync_all_caches_nolock(ptls);
gc_sync_all_caches(ptls);

int64_t live_sz_ub = live_bytes + actual_allocd;
int64_t live_sz_est = scanned_bytes + perm_scanned_bytes;
Expand Down Expand Up @@ -3625,7 +3614,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
sweep_weak_refs();
sweep_stack_pools();
gc_sweep_foreign_objs();
gc_sweep_other(ptls, sweep_full);
gc_sweep_other(ptls);
gc_scrub();
gc_verify_tags();
gc_sweep_pool();
Expand Down Expand Up @@ -3876,15 +3865,16 @@ void jl_init_thread_heap(jl_ptls_t ptls)
small_arraylist_new(&heap->free_stacks[i], 0);
heap->mallocarrays = NULL;
heap->mafreelist = NULL;
heap->big_objects = NULL;
heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;
arraylist_new(&heap->remset, 0);
arraylist_new(&ptls->finalizers, 0);
arraylist_new(&ptls->sweep_objs, 0);

jl_gc_mark_cache_t *gc_cache = &ptls->gc_cache;
gc_cache->perm_scanned_bytes = 0;
gc_cache->scanned_bytes = 0;
gc_cache->nbig_obj = 0;

// Initialize GC mark-queue
jl_gc_markqueue_t *mq = &ptls->mark_queue;
Expand All @@ -3910,12 +3900,16 @@ void jl_gc_init(void)
JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
uv_mutex_init(&page_profile_lock);
uv_mutex_init(&gc_cache_lock);
uv_mutex_init(&gc_perm_lock);
uv_mutex_init(&gc_threads_lock);
uv_cond_init(&gc_threads_cond);
uv_sem_init(&gc_sweep_assists_needed, 0);
uv_mutex_init(&gc_queue_observer_lock);
void *_addr = (void*)calloc_s(1); // dummy allocation to get the sentinel tag
uintptr_t addr = (uintptr_t)_addr;
gc_bigval_sentinel_tag = addr;
oldest_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
oldest_generation_of_bigvals->header = gc_bigval_sentinel_tag;

jl_gc_init_page();
jl_gc_debug_init();
Expand Down
Loading

0 comments on commit 96d6356

Please sign in to comment.