Skip to content

Commit

Permalink
Add ee_alloc_context
Browse files Browse the repository at this point in the history
This change is some preparatory refactoring for the randomized allocation sampling feature. We need to add more state onto allocation context but we don't want to do a breaking change of the GC interface. The new state only needs to be visible to the EE but we want it physically near the existing alloc context state for good cache locality. To accomplish this we created a new ee_alloc_context struct which contains an instance of gc_alloc_context within it.

The new ee_alloc_context.combined_limit field should be used by fast allocation helpers to determine when to go down the slow path. Most of the time combined_limit has the same value as alloc_limit, but periodically we need to emit an allocation sampling event on an object that is somewhere in the middle of an AC. Using combined_limit rather than alloc_limit as the slow path trigger allows us to keep all the sampling event logic in the slow path.
  • Loading branch information
noahfalk committed Jul 14, 2024
1 parent 42b2b19 commit da44466
Show file tree
Hide file tree
Showing 23 changed files with 206 additions and 81 deletions.
7 changes: 4 additions & 3 deletions src/coreclr/debug/daccess/dacdbiimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6551,10 +6551,11 @@ HRESULT DacHeapWalker::Init(CORDB_ADDRESS start, CORDB_ADDRESS end)
j++;
}
}
if ((&g_global_alloc_context)->alloc_ptr != nullptr)
gc_alloc_context globalCtx = ((ee_alloc_context)g_global_alloc_context).gc_allocation_context;
if (globalCtx.alloc_ptr != nullptr)
{
mAllocInfo[j].Ptr = (CORDB_ADDRESS)(&g_global_alloc_context)->alloc_ptr;
mAllocInfo[j].Limit = (CORDB_ADDRESS)(&g_global_alloc_context)->alloc_limit;
mAllocInfo[j].Ptr = (CORDB_ADDRESS)globalCtx.alloc_ptr;
mAllocInfo[j].Limit = (CORDB_ADDRESS)globalCtx.alloc_limit;
}

mThreadCount = j;
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/debug/daccess/request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5493,8 +5493,9 @@ HRESULT ClrDataAccess::GetGlobalAllocationContext(
}

SOSDacEnter();
*allocPtr = (CLRDATA_ADDRESS)((&g_global_alloc_context)->alloc_ptr);
*allocLimit = (CLRDATA_ADDRESS)((&g_global_alloc_context)->alloc_limit);
gc_alloc_context global_alloc_context = ((ee_alloc_context)g_global_alloc_context).gc_allocation_context;
*allocPtr = (CLRDATA_ADDRESS)global_alloc_context.alloc_ptr;
*allocLimit = (CLRDATA_ADDRESS)global_alloc_context.alloc_limit;
SOSDacLeave();
return hr;
}
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/debug/runtimeinfo/datadescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,14 @@ CDAC_TYPE_END(ThreadStore)

CDAC_TYPE_BEGIN(RuntimeThreadLocals)
CDAC_TYPE_INDETERMINATE(RuntimeThreadLocals)
CDAC_TYPE_FIELD(RuntimeThreadLocals, AllocContext, AllocContext, offsetof(RuntimeThreadLocals, alloc_context))
CDAC_TYPE_FIELD(RuntimeThreadLocals, /*EEAllocContext*/, AllocContext, offsetof(RuntimeThreadLocals, alloc_context))
CDAC_TYPE_END(RuntimeThreadLocals)

CDAC_TYPE_BEGIN(EEAllocContext)
CDAC_TYPE_INDETERMINATE(EEAllocContext)
CDAC_TYPE_FIELD(EEAllocContext, /*GCAllocContext*/, GCAllocationContext, offsetof(ee_alloc_context, gc_allocation_context))
CDAC_TYPE_END(EEAllocContext)

CDAC_TYPE_BEGIN(GCAllocContext)
CDAC_TYPE_INDETERMINATE(GCAllocContext)
CDAC_TYPE_FIELD(GCAllocContext, /*pointer*/, Pointer, offsetof(gc_alloc_context, alloc_ptr))
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/inc/dacvars.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ DEFINE_DACVAR(ProfControlBlock, dac__g_profControlBlock, ::g_profControlBlock)
DEFINE_DACVAR(PTR_DWORD, dac__g_card_table, ::g_card_table)
DEFINE_DACVAR(PTR_BYTE, dac__g_lowest_address, ::g_lowest_address)
DEFINE_DACVAR(PTR_BYTE, dac__g_highest_address, ::g_highest_address)
DEFINE_DACVAR(gc_alloc_context, dac__g_global_alloc_context, ::g_global_alloc_context)
DEFINE_DACVAR(ee_alloc_context, dac__g_global_alloc_context, ::g_global_alloc_context)

DEFINE_DACVAR(IGCHeap, dac__g_pGCHeap, ::g_pGCHeap)

Expand Down
30 changes: 15 additions & 15 deletions src/coreclr/vm/amd64/JitHelpers_Slow.asm
Original file line number Diff line number Diff line change
Expand Up @@ -180,15 +180,15 @@ LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT
inc [g_global_alloc_lock]
jnz JIT_NEW

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax

cmp r8, r10
ja AllocFailed

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], rcx
mov [g_global_alloc_lock], -1

Expand All @@ -208,8 +208,8 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
inc [g_global_alloc_lock]
jnz JIT_Box

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax

Expand All @@ -219,7 +219,7 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
test rdx, rdx
je NullRef

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], rcx
mov [g_global_alloc_lock], -1

Expand Down Expand Up @@ -287,15 +287,15 @@ LEAF_ENTRY AllocateStringFastUP, _TEXT
inc [g_global_alloc_lock]
jnz FramedAllocateString

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax

cmp r8, r10
ja AllocFailed

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], r11
mov [g_global_alloc_lock], -1

Expand Down Expand Up @@ -343,16 +343,16 @@ LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT
inc [g_global_alloc_lock]
jnz JIT_NewArr1

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax
jc AllocFailed

cmp r8, r10
ja AllocFailed

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], rcx
mov [g_global_alloc_lock], -1

Expand Down Expand Up @@ -396,15 +396,15 @@ LEAF_ENTRY JIT_NewArr1OBJ_UP, _TEXT
inc [g_global_alloc_lock]
jnz JIT_NewArr1

mov rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit] ; limit_ptr
mov rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr] ; alloc_ptr
mov r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit

add r8, rax

cmp r8, r10
ja AllocFailed

mov qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
mov [rax], rcx
mov [g_global_alloc_lock], -1

Expand Down
9 changes: 5 additions & 4 deletions src/coreclr/vm/amd64/asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,12 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pFrame
#define Thread_m_pFrame OFFSETOF__Thread__m_pFrame


#define OFFSETOF__gc_alloc_context__alloc_ptr 0x0
ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_ptr);
#define OFFSETOF__ee_alloc_context__alloc_ptr 0x8
ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, gc_allocation_context) +
offsetof(gc_alloc_context, alloc_ptr));

#define OFFSETOF__gc_alloc_context__alloc_limit 0x8
ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_limit);
#define OFFSETOF__ee_alloc_context__combined_limit 0x0
ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, combined_limit));

#define OFFSETOF__ThreadExceptionState__m_pCurrentTracker 0x000
ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadExceptionState__m_pCurrentTracker
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/comutilnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ FCIMPL0(INT64, GCInterface::GetAllocatedBytesForCurrentThread)

INT64 currentAllocated = 0;
Thread *pThread = GetThread();
gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context;
gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context.gc_allocation_context;
currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr);

return currentAllocated;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gccover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1834,7 +1834,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
// BUG(github #10318) - when not using allocation contexts, the alloc lock
// must be acquired here. Until fixed, this assert prevents random heap corruption.
assert(GCHeapUtilities::UseThreadAllocationContexts());
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context.gc_allocation_context);

// StressHeap can exit early w/o forcing a SuspendEE to trigger the instruction update
// We can not rely on the return code to determine if the instruction update happened
Expand Down
15 changes: 11 additions & 4 deletions src/coreclr/vm/gcenv.ee.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,14 @@ gc_alloc_context * GCToEEInterface::GetAllocContext()
return nullptr;
}

return &t_runtime_thread_locals.alloc_context;
return &t_runtime_thread_locals.alloc_context.gc_allocation_context;
}

void InvokeGCAllocCallback(ee_alloc_context* pEEAllocContext, enum_alloc_context_func* fn, void* param)
{
gc_alloc_context* pAllocContext = &pEEAllocContext->gc_allocation_context;
fn(pAllocContext, param);
pEEAllocContext->UpdateCombinedLimit();
}

void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* param)
Expand All @@ -460,16 +467,16 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par
Thread * pThread = NULL;
while ((pThread = ThreadStore::GetThreadList(pThread)) != NULL)
{
gc_alloc_context* palloc_context = pThread->GetAllocContext();
ee_alloc_context* palloc_context = pThread->GetEEAllocContext();
if (palloc_context != nullptr)
{
fn(palloc_context, param);
InvokeGCAllocCallback(palloc_context, fn, param);
}
}
}
else
{
fn(&g_global_alloc_context, param);
InvokeGCAllocCallback(&g_global_alloc_context, fn, param);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcheaputilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ bool g_sw_ww_enabled_for_gc_heap = false;

#endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP

GVAL_IMPL_INIT(gc_alloc_context, g_global_alloc_context, {});
GVAL_IMPL_INIT(ee_alloc_context, g_global_alloc_context, {});

enum GC_LOAD_STATUS {
GC_LOAD_STATUS_BEFORE_START,
Expand Down
66 changes: 65 additions & 1 deletion src/coreclr/vm/gcheaputilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,69 @@ GPTR_DECL(IGCHeap, g_pGCHeap);
#ifndef DACCESS_COMPILE
extern "C" {
#endif // !DACCESS_COMPILE

// This struct allows adding some state that is only visible to the EE onto the standard gc_alloc_context
struct ee_alloc_context
{
// Any allocation that would overlap combined_limit needs to be handled by the allocation slow path.
// combined_limit is the minimum of:
// - gc_alloc_context.alloc_limit (the end of the current AC)
// - the sampling_limit
//
// In the simple case that randomized sampling is disabled, combined_limit is always equal to alloc_limit.
//
// There are two different useful interpretations for the sampling_limit. One is to treat the sampling_limit
// as an address and when we allocate an object that overlaps that address we should emit a sampling event.
// The other is that we can treat (sampling_limit - alloc_ptr) as a budget of how many bytes we can allocate
// before emitting a sampling event. If we always allocated objects contiguously in the AC and incremented
// alloc_ptr by the size of the object, these two interpretations would be equivalent. However, when objects
// don't fit in the AC we allocate them in some other address range. The budget interpretation is more
// flexible to handle those cases.
//
// The sampling limit isn't stored in any separate field explicitly, instead it is implied:
// - if combined_limit == alloc_limit there is no sampled byte in the AC. In the budget interpretation
// we can allocate (alloc_limit - alloc_ptr) unsampled bytes. We'll need a new random number after
// that to determine whether future allocated bytes should be sampled.
// This occurs either because the sampling feature is disabled, or because the randomized selection
// of sampled bytes didn't select a byte in this AC.
// - if combined_limit < alloc_limit there is a sample limit in the AC. sample_limit = combined_limit.
uint8_t* combined_limit;
gc_alloc_context gc_allocation_context;

void init()
{
LIMITED_METHOD_CONTRACT;
combined_limit = 0;
gc_allocation_context.init();
}

uint8_t* getCombinedLimit()
{
LIMITED_METHOD_CONTRACT;
return combined_limit;
}

static size_t getAllocPtrFieldOffset()
{
LIMITED_METHOD_CONTRACT;
return offsetof(ee_alloc_context, gc_allocation_context) + offsetof(gc_alloc_context, alloc_ptr);
}

static size_t getCombinedLimitFieldOffset()
{
LIMITED_METHOD_CONTRACT;
return offsetof(ee_alloc_context, combined_limit);
}

// Regenerate the randomized sampling limit and update the combined_limit field.
inline void UpdateCombinedLimit()
{
// The randomized sampling feature is being submitted in stages. At this point the sampling is never
// activated so combined_limit is always equal to alloc_limit.
combined_limit = gc_allocation_context.alloc_limit;
}
};

GPTR_DECL(uint8_t,g_lowest_address);
GPTR_DECL(uint8_t,g_highest_address);
GPTR_DECL(uint32_t,g_card_table);
Expand All @@ -21,7 +84,8 @@ GVAL_DECL(GCHeapType, g_heap_type);
// for all allocations. In order to avoid extra indirections in assembly
// allocation helpers, the EE owns the global allocation context and the
// GC will update it when it needs to.
GVAL_DECL(gc_alloc_context, g_global_alloc_context);
GVAL_DECL(ee_alloc_context, g_global_alloc_context);

#ifndef DACCESS_COMPILE
}
#endif // !DACCESS_COMPILE
Expand Down
17 changes: 10 additions & 7 deletions src/coreclr/vm/gchelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
//
//========================================================================

inline gc_alloc_context* GetThreadAllocContext()
inline ee_alloc_context* GetThreadEEAllocContext()
{
WRAPPER_NO_CONTRACT;

Expand Down Expand Up @@ -222,16 +222,19 @@ inline Object* Alloc(size_t size, GC_ALLOC_FLAGS flags)

if (GCHeapUtilities::UseThreadAllocationContexts())
{
gc_alloc_context *threadContext = GetThreadAllocContext();
GCStress<gc_on_alloc>::MaybeTrigger(threadContext);
retVal = GCHeapUtilities::GetGCHeap()->Alloc(threadContext, size, flags);
ee_alloc_context *threadContext = GetThreadEEAllocContext();
GCStress<gc_on_alloc>::MaybeTrigger(&threadContext->gc_allocation_context);
retVal = GCHeapUtilities::GetGCHeap()->Alloc(&threadContext->gc_allocation_context, size, flags);
threadContext->UpdateCombinedLimit();

}
else
{
GlobalAllocLockHolder holder(&g_global_alloc_lock);
gc_alloc_context *globalContext = &g_global_alloc_context;
GCStress<gc_on_alloc>::MaybeTrigger(globalContext);
retVal = GCHeapUtilities::GetGCHeap()->Alloc(globalContext, size, flags);
ee_alloc_context *globalContext = &g_global_alloc_context;
GCStress<gc_on_alloc>::MaybeTrigger(&globalContext->gc_allocation_context);
retVal = GCHeapUtilities::GetGCHeap()->Alloc(&globalContext->gc_allocation_context, size, flags);
globalContext->UpdateCombinedLimit();
}


Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcstress.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ namespace _GCStress
// BUG(github #10318) - when not using allocation contexts, the alloc lock
// must be acquired here. Until fixed, this assert prevents random heap corruption.
_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context.gc_allocation_context);
}

FORCEINLINE
Expand Down
Loading

0 comments on commit da44466

Please sign in to comment.