From da44466ae2b95bd5e522d9a6966913fa0407558d Mon Sep 17 00:00:00 2001
From: Noah Falk <noahfalk@microsoft.com>
Date: Fri, 12 Jul 2024 22:30:14 -0700
Subject: [PATCH] Add ee_alloc_context

This change is some preparatory refactoring for the randomized allocation sampling feature. We need to add more state onto allocation context but we don't want to do a breaking change of the GC interface. The new state only needs to be visible to the EE but we want it physically near the existing alloc context state for good cache locality. To accomplish this we created a new ee_alloc_context struct which contains an instance of gc_alloc_context within it.

The new ee_alloc_context.combined_limit field should be used by fast allocation helpers to determine when to go down the slow path. Most of the time combined_limit has the same value as alloc_limit, but periodically we need to emit an allocation sampling event on an object that is somewhere in the middle of an AC. Using combined_limit rather than alloc_limit as the slow path trigger allows us to keep all the sampling event logic in the slow path.
---
 src/coreclr/debug/daccess/dacdbiimpl.cpp      |  7 +-
 src/coreclr/debug/daccess/request.cpp         |  5 +-
 .../debug/runtimeinfo/datadescriptor.h        |  7 +-
 src/coreclr/inc/dacvars.h                     |  2 +-
 src/coreclr/vm/amd64/JitHelpers_Slow.asm      | 30 ++++-----
 src/coreclr/vm/amd64/asmconstants.h           |  9 +--
 src/coreclr/vm/comutilnative.cpp              |  2 +-
 src/coreclr/vm/gccover.cpp                    |  2 +-
 src/coreclr/vm/gcenv.ee.cpp                   | 15 +++--
 src/coreclr/vm/gcheaputilities.cpp            |  2 +-
 src/coreclr/vm/gcheaputilities.h              | 66 ++++++++++++++++++-
 src/coreclr/vm/gchelpers.cpp                  | 17 +++--
 src/coreclr/vm/gcstress.h                     |  2 +-
 src/coreclr/vm/i386/jitinterfacex86.cpp       | 28 ++++----
 src/coreclr/vm/i386/stublinkerx86.cpp         |  2 +-
 src/coreclr/vm/jithelpers.cpp                 | 35 +++++-----
 src/coreclr/vm/threads.cpp                    |  5 +-
 src/coreclr/vm/threads.h                      | 22 ++++++-
 src/coreclr/vm/threadsuspend.cpp              |  2 +-
 .../cdacreader/src/Contracts/Thread.cs        |  4 +-
 .../cdacreader/src/Data/EEAllocContext.cs     | 18 +++++
 .../src/Data/RuntimeThreadLocals.cs           |  4 +-
 src/native/managed/cdacreader/src/DataType.cs |  1 +
 23 files changed, 206 insertions(+), 81 deletions(-)
 create mode 100644 src/native/managed/cdacreader/src/Data/EEAllocContext.cs

diff --git a/src/coreclr/debug/daccess/dacdbiimpl.cpp b/src/coreclr/debug/daccess/dacdbiimpl.cpp
index a6dda591278557..a3de4aa57e4fdb 100644
--- a/src/coreclr/debug/daccess/dacdbiimpl.cpp
+++ b/src/coreclr/debug/daccess/dacdbiimpl.cpp
@@ -6551,10 +6551,11 @@ HRESULT DacHeapWalker::Init(CORDB_ADDRESS start, CORDB_ADDRESS end)
                 j++;
             }
         }
-        if ((&g_global_alloc_context)->alloc_ptr != nullptr)
+        gc_alloc_context globalCtx = ((ee_alloc_context)g_global_alloc_context).gc_allocation_context;
+        if (globalCtx.alloc_ptr != nullptr)
         {
-            mAllocInfo[j].Ptr = (CORDB_ADDRESS)(&g_global_alloc_context)->alloc_ptr;
-            mAllocInfo[j].Limit = (CORDB_ADDRESS)(&g_global_alloc_context)->alloc_limit;
+            mAllocInfo[j].Ptr = (CORDB_ADDRESS)globalCtx.alloc_ptr;
+            mAllocInfo[j].Limit = (CORDB_ADDRESS)globalCtx.alloc_limit;
         }
 
         mThreadCount = j;
diff --git a/src/coreclr/debug/daccess/request.cpp b/src/coreclr/debug/daccess/request.cpp
index 2dc737db2e7007..9096195f16e0a8 100644
--- a/src/coreclr/debug/daccess/request.cpp
+++ b/src/coreclr/debug/daccess/request.cpp
@@ -5493,8 +5493,9 @@ HRESULT ClrDataAccess::GetGlobalAllocationContext(
     }
 
     SOSDacEnter();
-    *allocPtr = (CLRDATA_ADDRESS)((&g_global_alloc_context)->alloc_ptr);
-    *allocLimit = (CLRDATA_ADDRESS)((&g_global_alloc_context)->alloc_limit);
+    gc_alloc_context global_alloc_context = ((ee_alloc_context)g_global_alloc_context).gc_allocation_context;
+    *allocPtr = (CLRDATA_ADDRESS)global_alloc_context.alloc_ptr;
+    *allocLimit = (CLRDATA_ADDRESS)global_alloc_context.alloc_limit;
     SOSDacLeave();
     return hr;
 }
diff --git a/src/coreclr/debug/runtimeinfo/datadescriptor.h b/src/coreclr/debug/runtimeinfo/datadescriptor.h
index a0c71736fd6f9a..5589991d46e34b 100644
--- a/src/coreclr/debug/runtimeinfo/datadescriptor.h
+++ b/src/coreclr/debug/runtimeinfo/datadescriptor.h
@@ -132,9 +132,14 @@ CDAC_TYPE_END(ThreadStore)
 
 CDAC_TYPE_BEGIN(RuntimeThreadLocals)
 CDAC_TYPE_INDETERMINATE(RuntimeThreadLocals)
-CDAC_TYPE_FIELD(RuntimeThreadLocals, AllocContext, AllocContext, offsetof(RuntimeThreadLocals, alloc_context))
+CDAC_TYPE_FIELD(RuntimeThreadLocals, /*EEAllocContext*/, AllocContext, offsetof(RuntimeThreadLocals, alloc_context))
 CDAC_TYPE_END(RuntimeThreadLocals)
 
+CDAC_TYPE_BEGIN(EEAllocContext)
+CDAC_TYPE_INDETERMINATE(EEAllocContext)
+CDAC_TYPE_FIELD(EEAllocContext, /*GCAllocContext*/, GCAllocationContext, offsetof(ee_alloc_context, gc_allocation_context))
+CDAC_TYPE_END(EEAllocContext)
+
 CDAC_TYPE_BEGIN(GCAllocContext)
 CDAC_TYPE_INDETERMINATE(GCAllocContext)
 CDAC_TYPE_FIELD(GCAllocContext, /*pointer*/, Pointer, offsetof(gc_alloc_context, alloc_ptr))
diff --git a/src/coreclr/inc/dacvars.h b/src/coreclr/inc/dacvars.h
index 03995176313c24..c090c621ac2299 100644
--- a/src/coreclr/inc/dacvars.h
+++ b/src/coreclr/inc/dacvars.h
@@ -140,7 +140,7 @@ DEFINE_DACVAR(ProfControlBlock, dac__g_profControlBlock, ::g_profControlBlock)
 DEFINE_DACVAR(PTR_DWORD, dac__g_card_table, ::g_card_table)
 DEFINE_DACVAR(PTR_BYTE, dac__g_lowest_address, ::g_lowest_address)
 DEFINE_DACVAR(PTR_BYTE, dac__g_highest_address, ::g_highest_address)
-DEFINE_DACVAR(gc_alloc_context, dac__g_global_alloc_context, ::g_global_alloc_context)
+DEFINE_DACVAR(ee_alloc_context, dac__g_global_alloc_context, ::g_global_alloc_context)
 
 DEFINE_DACVAR(IGCHeap, dac__g_pGCHeap, ::g_pGCHeap)
 
diff --git a/src/coreclr/vm/amd64/JitHelpers_Slow.asm b/src/coreclr/vm/amd64/JitHelpers_Slow.asm
index 6d322248cdeeec..8ccb624901100b 100644
--- a/src/coreclr/vm/amd64/JitHelpers_Slow.asm
+++ b/src/coreclr/vm/amd64/JitHelpers_Slow.asm
@@ -180,15 +180,15 @@ LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT
         inc     [g_global_alloc_lock]
         jnz     JIT_NEW
 
-        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
-        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]      ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
 
         add     r8, rax
 
         cmp     r8, r10
         ja      AllocFailed
 
-        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
         mov     [rax], rcx
         mov     [g_global_alloc_lock], -1
 
@@ -208,8 +208,8 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
         inc     [g_global_alloc_lock]
         jnz     JIT_Box
 
-        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
-        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]      ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
 
         add     r8, rax
 
@@ -219,7 +219,7 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT
         test    rdx, rdx
         je      NullRef
 
-        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
         mov     [rax], rcx
         mov     [g_global_alloc_lock], -1
 
@@ -287,15 +287,15 @@ LEAF_ENTRY AllocateStringFastUP, _TEXT
         inc     [g_global_alloc_lock]
         jnz     FramedAllocateString
 
-        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
-        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]      ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
 
         add     r8, rax
 
         cmp     r8, r10
         ja      AllocFailed
 
-        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
         mov     [rax], r11
         mov     [g_global_alloc_lock], -1
 
@@ -343,8 +343,8 @@ LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT
         inc     [g_global_alloc_lock]
         jnz     JIT_NewArr1
 
-        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
-        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]      ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
 
         add     r8, rax
         jc      AllocFailed
@@ -352,7 +352,7 @@ LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT
         cmp     r8, r10
         ja      AllocFailed
 
-        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
         mov     [rax], rcx
         mov     [g_global_alloc_lock], -1
 
@@ -396,15 +396,15 @@ LEAF_ENTRY JIT_NewArr1OBJ_UP, _TEXT
         inc     [g_global_alloc_lock]
         jnz     JIT_NewArr1
 
-        mov     rax, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr]       ; alloc_ptr
-        mov     r10, [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_limit]     ; limit_ptr
+        mov     rax, [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr]      ; alloc_ptr
+        mov     r10, [g_global_alloc_context + OFFSETOF__ee_alloc_context__combined_limit] ; combined_limit
 
         add     r8, rax
 
         cmp     r8, r10
         ja      AllocFailed
 
-        mov     qword ptr [g_global_alloc_context + OFFSETOF__gc_alloc_context__alloc_ptr], r8     ; update the alloc ptr
+        mov     qword ptr [g_global_alloc_context + OFFSETOF__ee_alloc_context__alloc_ptr], r8 ; update the alloc ptr
         mov     [rax], rcx
         mov     [g_global_alloc_lock], -1
 
diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h
index 524e1fd40b7ae8..82724018775a04 100644
--- a/src/coreclr/vm/amd64/asmconstants.h
+++ b/src/coreclr/vm/amd64/asmconstants.h
@@ -111,11 +111,12 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pFrame
 #define Thread_m_pFrame OFFSETOF__Thread__m_pFrame
 
 
-#define               OFFSETOF__gc_alloc_context__alloc_ptr 0x0
-ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_ptr);
+#define               OFFSETOF__ee_alloc_context__alloc_ptr 0x8
+ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__alloc_ptr == offsetof(ee_alloc_context, gc_allocation_context) +
+                                                               offsetof(gc_alloc_context, alloc_ptr));
 
-#define               OFFSETOF__gc_alloc_context__alloc_limit 0x8
-ASMCONSTANT_OFFSETOF_ASSERT(gc_alloc_context, alloc_limit);
+#define               OFFSETOF__ee_alloc_context__combined_limit 0x0
+ASMCONSTANTS_C_ASSERT(OFFSETOF__ee_alloc_context__combined_limit == offsetof(ee_alloc_context, combined_limit));
 
 #define               OFFSETOF__ThreadExceptionState__m_pCurrentTracker 0x000
 ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadExceptionState__m_pCurrentTracker
diff --git a/src/coreclr/vm/comutilnative.cpp b/src/coreclr/vm/comutilnative.cpp
index a281ac7505d089..eca0a8b80803b0 100644
--- a/src/coreclr/vm/comutilnative.cpp
+++ b/src/coreclr/vm/comutilnative.cpp
@@ -848,7 +848,7 @@ FCIMPL0(INT64, GCInterface::GetAllocatedBytesForCurrentThread)
 
     INT64 currentAllocated = 0;
     Thread *pThread = GetThread();
-    gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context;
+    gc_alloc_context* ac = &t_runtime_thread_locals.alloc_context.gc_allocation_context;
     currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr);
 
     return currentAllocated;
diff --git a/src/coreclr/vm/gccover.cpp b/src/coreclr/vm/gccover.cpp
index b7ae97613d507d..ab564c6ba17730 100644
--- a/src/coreclr/vm/gccover.cpp
+++ b/src/coreclr/vm/gccover.cpp
@@ -1834,7 +1834,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
     // BUG(github #10318) - when not using allocation contexts, the alloc lock
     // must be acquired here. Until fixed, this assert prevents random heap corruption.
     assert(GCHeapUtilities::UseThreadAllocationContexts());
-    GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
+    GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context.gc_allocation_context);
 
     // StressHeap can exit early w/o forcing a SuspendEE to trigger the instruction update
     // We can not rely on the return code to determine if the instruction update happened
diff --git a/src/coreclr/vm/gcenv.ee.cpp b/src/coreclr/vm/gcenv.ee.cpp
index 852c655cf9591e..3efba53f2cbabd 100644
--- a/src/coreclr/vm/gcenv.ee.cpp
+++ b/src/coreclr/vm/gcenv.ee.cpp
@@ -443,7 +443,14 @@ gc_alloc_context * GCToEEInterface::GetAllocContext()
         return nullptr;
     }
 
-    return &t_runtime_thread_locals.alloc_context;
+    return &t_runtime_thread_locals.alloc_context.gc_allocation_context;
+}
+
+void InvokeGCAllocCallback(ee_alloc_context* pEEAllocContext, enum_alloc_context_func* fn, void* param)
+{
+    gc_alloc_context* pAllocContext = &pEEAllocContext->gc_allocation_context;
+    fn(pAllocContext, param);
+    pEEAllocContext->UpdateCombinedLimit();
 }
 
 void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* param)
@@ -460,16 +467,16 @@ void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* par
         Thread * pThread = NULL;
         while ((pThread = ThreadStore::GetThreadList(pThread)) != NULL)
         {
-            gc_alloc_context* palloc_context = pThread->GetAllocContext();
+            ee_alloc_context* palloc_context = pThread->GetEEAllocContext();
             if (palloc_context != nullptr)
             {
-                fn(palloc_context, param);
+                InvokeGCAllocCallback(palloc_context, fn, param);
             }
         }
     }
     else
     {
-        fn(&g_global_alloc_context, param);
+        InvokeGCAllocCallback(&g_global_alloc_context, fn, param);
     }
 }
 
diff --git a/src/coreclr/vm/gcheaputilities.cpp b/src/coreclr/vm/gcheaputilities.cpp
index cd0259eef45d83..32ea33a91cc3ce 100644
--- a/src/coreclr/vm/gcheaputilities.cpp
+++ b/src/coreclr/vm/gcheaputilities.cpp
@@ -41,7 +41,7 @@ bool g_sw_ww_enabled_for_gc_heap = false;
 
 #endif // FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
 
-GVAL_IMPL_INIT(gc_alloc_context, g_global_alloc_context, {});
+GVAL_IMPL_INIT(ee_alloc_context, g_global_alloc_context, {});
 
 enum GC_LOAD_STATUS {
     GC_LOAD_STATUS_BEFORE_START,
diff --git a/src/coreclr/vm/gcheaputilities.h b/src/coreclr/vm/gcheaputilities.h
index c652cc52bf417c..bbefdc9cd7bc32 100644
--- a/src/coreclr/vm/gcheaputilities.h
+++ b/src/coreclr/vm/gcheaputilities.h
@@ -12,6 +12,69 @@ GPTR_DECL(IGCHeap, g_pGCHeap);
 #ifndef DACCESS_COMPILE
 extern "C" {
 #endif // !DACCESS_COMPILE
+
+// This struct allows adding some state that is only visible to the EE onto the standard gc_alloc_context
+struct ee_alloc_context
+{
+    // Any allocation that would overlap combined_limit needs to be handled by the allocation slow path.
+    // combined_limit is the minimum of:
+    //  - gc_alloc_context.alloc_limit (the end of the current AC)
+    //  - the sampling_limit
+    //
+    // In the simple case that randomized sampling is disabled, combined_limit is always equal to alloc_limit.
+    //
+    // There are two different useful interpretations for the sampling_limit. One is to treat the sampling_limit
+    // as an address and when we allocate an object that overlaps that address we should emit a sampling event.
+    // The other is that we can treat (sampling_limit - alloc_ptr) as a budget of how many bytes we can allocate
+    // before emitting a sampling event. If we always allocated objects contiguously in the AC and incremented
+    // alloc_ptr by the size of the object, these two interpretations would be equivalent. However, when objects
+    // don't fit in the AC we allocate them in some other address range. The budget interpretation is more
+    // flexible to handle those cases.
+    //
+    // The sampling limit isn't stored in any separate field explicitly, instead it is implied:
+    // - if combined_limit == alloc_limit there is no sampled byte in the AC. In the budget interpretation
+    //   we can allocate (alloc_limit - alloc_ptr) unsampled bytes. We'll need a new random number after
+    //   that to determine whether future allocated bytes should be sampled.
+    //   This occurs either because the sampling feature is disabled, or because the randomized selection
+    //   of sampled bytes didn't select a byte in this AC.
+    // - if combined_limit < alloc_limit there is a sample limit in the AC. sample_limit = combined_limit.
+    uint8_t* combined_limit;
+    gc_alloc_context gc_allocation_context;
+
+    void init()
+    {
+        LIMITED_METHOD_CONTRACT;
+        combined_limit = 0;
+        gc_allocation_context.init();
+    }
+
+    uint8_t* getCombinedLimit()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return combined_limit;
+    }
+
+    static size_t getAllocPtrFieldOffset()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return offsetof(ee_alloc_context, gc_allocation_context) + offsetof(gc_alloc_context, alloc_ptr);
+    }
+
+    static size_t getCombinedLimitFieldOffset()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return offsetof(ee_alloc_context, combined_limit);
+    }
+
+    // Regenerate the randomized sampling limit and update the combined_limit field.
+    inline void UpdateCombinedLimit()
+    {
+        // The randomized sampling feature is being submitted in stages. At this point the sampling is never
+        // activated so combined_limit is always equal to alloc_limit.
+        combined_limit = gc_allocation_context.alloc_limit;
+    }
+};
+
 GPTR_DECL(uint8_t,g_lowest_address);
 GPTR_DECL(uint8_t,g_highest_address);
 GPTR_DECL(uint32_t,g_card_table);
@@ -21,7 +84,8 @@ GVAL_DECL(GCHeapType, g_heap_type);
 // for all allocations. In order to avoid extra indirections in assembly
 // allocation helpers, the EE owns the global allocation context and the
 // GC will update it when it needs to.
-GVAL_DECL(gc_alloc_context, g_global_alloc_context);
+GVAL_DECL(ee_alloc_context, g_global_alloc_context);
+
 #ifndef DACCESS_COMPILE
 }
 #endif // !DACCESS_COMPILE
diff --git a/src/coreclr/vm/gchelpers.cpp b/src/coreclr/vm/gchelpers.cpp
index 335bd3cb25caba..cab5986ecd66a3 100644
--- a/src/coreclr/vm/gchelpers.cpp
+++ b/src/coreclr/vm/gchelpers.cpp
@@ -40,7 +40,7 @@
 //
 //========================================================================
 
-inline gc_alloc_context* GetThreadAllocContext()
+inline ee_alloc_context* GetThreadEEAllocContext()
 {
     WRAPPER_NO_CONTRACT;
 
@@ -222,16 +222,19 @@ inline Object* Alloc(size_t size, GC_ALLOC_FLAGS flags)
 
     if (GCHeapUtilities::UseThreadAllocationContexts())
     {
-        gc_alloc_context *threadContext = GetThreadAllocContext();
-        GCStress<gc_on_alloc>::MaybeTrigger(threadContext);
-        retVal = GCHeapUtilities::GetGCHeap()->Alloc(threadContext, size, flags);
+        ee_alloc_context *threadContext = GetThreadEEAllocContext();
+        GCStress<gc_on_alloc>::MaybeTrigger(&threadContext->gc_allocation_context);
+        retVal = GCHeapUtilities::GetGCHeap()->Alloc(&threadContext->gc_allocation_context, size, flags);
+        threadContext->UpdateCombinedLimit();
+
     }
     else
     {
         GlobalAllocLockHolder holder(&g_global_alloc_lock);
-        gc_alloc_context *globalContext = &g_global_alloc_context;
-        GCStress<gc_on_alloc>::MaybeTrigger(globalContext);
-        retVal = GCHeapUtilities::GetGCHeap()->Alloc(globalContext, size, flags);
+        ee_alloc_context *globalContext = &g_global_alloc_context;
+        GCStress<gc_on_alloc>::MaybeTrigger(&globalContext->gc_allocation_context);
+        retVal = GCHeapUtilities::GetGCHeap()->Alloc(&globalContext->gc_allocation_context, size, flags);
+        globalContext->UpdateCombinedLimit();
     }
 
 
diff --git a/src/coreclr/vm/gcstress.h b/src/coreclr/vm/gcstress.h
index 23b11d9989fcf6..a5626da1b6961c 100644
--- a/src/coreclr/vm/gcstress.h
+++ b/src/coreclr/vm/gcstress.h
@@ -298,7 +298,7 @@ namespace _GCStress
             // BUG(github #10318) - when not using allocation contexts, the alloc lock
             // must be acquired here. Until fixed, this assert prevents random heap corruption.
             _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
-            GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
+            GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context.gc_allocation_context);
         }
 
         FORCEINLINE
diff --git a/src/coreclr/vm/i386/jitinterfacex86.cpp b/src/coreclr/vm/i386/jitinterfacex86.cpp
index 3807b00a8ca6e1..fc98ef46c4e746 100644
--- a/src/coreclr/vm/i386/jitinterfacex86.cpp
+++ b/src/coreclr/vm/i386/jitinterfacex86.cpp
@@ -237,8 +237,8 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *
 
         if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ))
         {
-            // MOV EBX, [edx]gc_alloc_context.alloc_ptr
-            psl->X86EmitOffsetModRM(0x8B, kEBX, kEDX, offsetof(gc_alloc_context, alloc_ptr));
+            // MOV EBX, [edx]alloc_context.gc_allocation_context.alloc_ptr
+            psl->X86EmitOffsetModRM(0x8B, kEBX, kEDX, ee_alloc_context::getAllocPtrFieldOffset());
             // add EAX, EBX
             psl->Emit16(0xC303);
             if (flags & ALIGN8)
@@ -246,20 +246,20 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *
         }
         else
         {
-            // add             eax, [edx]gc_alloc_context.alloc_ptr
-            psl->X86EmitOffsetModRM(0x03, kEAX, kEDX, offsetof(gc_alloc_context, alloc_ptr));
+            // add             eax, [edx]alloc_context.gc_allocation_context.alloc_ptr
+            psl->X86EmitOffsetModRM(0x03, kEAX, kEDX, ee_alloc_context::getAllocPtrFieldOffset());
         }
 
-        // cmp             eax, [edx]gc_alloc_context.alloc_limit
-        psl->X86EmitOffsetModRM(0x3b, kEAX, kEDX, offsetof(gc_alloc_context, alloc_limit));
+        // cmp             eax, [edx]alloc_context.combined_limit
+        psl->X86EmitOffsetModRM(0x3b, kEAX, kEDX, ee_alloc_context::getCombinedLimitFieldOffset());
 
         // ja              noAlloc
         psl->X86EmitCondJump(noAlloc, X86CondCode::kJA);
 
         // Fill in the allocation and get out.
 
-        // mov             [edx]gc_alloc_context.alloc_ptr, eax
-        psl->X86EmitIndexRegStore(kEDX, offsetof(gc_alloc_context, alloc_ptr), kEAX);
+        // mov             [edx]alloc_context.gc_allocation_context.alloc_ptr, eax
+        psl->X86EmitIndexRegStore(kEDX, ee_alloc_context::getAllocPtrFieldOffset(), kEAX);
 
         if (flags & (ALIGN8 | SIZE_IN_EAX | ALIGN8OBJ))
         {
@@ -301,9 +301,9 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *
             psl->X86EmitIndexRegLoad(kEDX, kECX, offsetof(MethodTable, m_BaseSize));
         }
 
-        // mov             eax, dword ptr [g_global_alloc_context]
+        // mov             eax, dword ptr [g_global_alloc_context.gc_allocation_context.alloc_ptr]
         psl->Emit8(0xA1);
-        psl->Emit32((int)(size_t)&g_global_alloc_context);
+        psl->Emit32((int)(size_t)&g_global_alloc_context + ee_alloc_context::getAllocPtrFieldOffset());
 
         // Try the allocation.
         // add             edx, eax
@@ -312,17 +312,17 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel *
         if (flags & (ALIGN8 | ALIGN8OBJ))
             EmitAlignmentRoundup(psl, kEAX, kEDX, flags);      // bump up EDX size by 12 if EAX unaligned (so that we are aligned)
 
-        // cmp             edx, dword ptr [g_global_alloc_context+4]
+        // cmp             edx, dword ptr [g_global_alloc_context.combined_limit]
         psl->Emit16(0x153b);
-        psl->Emit32((int)(size_t)&g_global_alloc_context + 4);
+        psl->Emit32((int)(size_t)&g_global_alloc_context + ee_alloc_context::getCombinedLimitFieldOffset());
 
         // ja              noAlloc
         psl->X86EmitCondJump(noAlloc, X86CondCode::kJA);
 
         // Fill in the allocation and get out.
-        // mov             dword ptr [g_global_alloc_context], edx
+        // mov             dword ptr [g_global_alloc_context.gc_allocation_context.alloc_ptr], edx
         psl->Emit16(0x1589);
-        psl->Emit32((int)(size_t)&g_global_alloc_context);
+        psl->Emit32((int)(size_t)&g_global_alloc_context + ee_alloc_context::getAllocPtrFieldOffset());
 
         if (flags & (ALIGN8 | ALIGN8OBJ))
             EmitDummyObject(psl, kEAX, flags);
diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp
index cfe9eec74af2e5..6a607635c4cb3d 100644
--- a/src/coreclr/vm/i386/stublinkerx86.cpp
+++ b/src/coreclr/vm/i386/stublinkerx86.cpp
@@ -2434,7 +2434,7 @@ namespace
 {
     gc_alloc_context* STDCALL GetAllocContextHelper()
     {
-        return &t_runtime_thread_locals.alloc_context;
+        return &t_runtime_thread_locals.alloc_context.gc_allocation_context;
     }
 }
 #endif
diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp
index 1bfeaf2b039289..c94754031711a5 100644
--- a/src/coreclr/vm/jithelpers.cpp
+++ b/src/coreclr/vm/jithelpers.cpp
@@ -1668,7 +1668,8 @@ HCIMPL1_RAW(Object*, JIT_NewS_MP_FastPortable, CORINFO_CLASS_HANDLE typeHnd_)
     } CONTRACTL_END;
 
     _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
-    gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context;
+    ee_alloc_context *eeAllocContext = &t_runtime_thread_locals.alloc_context;
+    gc_alloc_context *allocContext = &eeAllocContext->gc_allocation_context;
 
     TypeHandle typeHandle(typeHnd_);
     _ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables
@@ -1678,8 +1679,8 @@ HCIMPL1_RAW(Object*, JIT_NewS_MP_FastPortable, CORINFO_CLASS_HANDLE typeHnd_)
     _ASSERTE(size % DATA_ALIGNMENT == 0);
 
     BYTE *allocPtr = allocContext->alloc_ptr;
-    _ASSERTE(allocPtr <= allocContext->alloc_limit);
-    if (size > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
+    _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit());
+    if (size > static_cast<SIZE_T>(eeAllocContext->getCombinedLimit() - allocPtr))
     {
         // Tail call to the slow helper
         return HCCALL1(JIT_New, typeHnd_);
@@ -1785,7 +1786,8 @@ HCIMPL1_RAW(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength)
         return HCCALL1(FramedAllocateString, stringLength);
     }
 
-    gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context;
+    ee_alloc_context *eeAllocContext = &t_runtime_thread_locals.alloc_context;
+    gc_alloc_context *allocContext = &eeAllocContext->gc_allocation_context;
 
     SIZE_T totalSize = StringObject::GetSize(stringLength);
 
@@ -1798,8 +1800,8 @@ HCIMPL1_RAW(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength)
     totalSize = alignedTotalSize;
 
     BYTE *allocPtr = allocContext->alloc_ptr;
-    _ASSERTE(allocPtr <= allocContext->alloc_limit);
-    if (totalSize > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
+    _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit());
+    if (totalSize > static_cast<SIZE_T>(eeAllocContext->getCombinedLimit() - allocPtr))
     {
         // Tail call to the slow helper
         return HCCALL1(FramedAllocateString, stringLength);
@@ -1901,7 +1903,8 @@ HCIMPL2_RAW(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT
         return HCCALL2(JIT_NewArr1, arrayMT, size);
     }
 
-    gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context;
+    ee_alloc_context* eeAllocContext = &t_runtime_thread_locals.alloc_context;
+    gc_alloc_context* allocContext = &eeAllocContext->gc_allocation_context;
 
     MethodTable *pArrayMT = (MethodTable *)arrayMT;
 
@@ -1919,8 +1922,8 @@ HCIMPL2_RAW(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT
     totalSize = alignedTotalSize;
 
     BYTE *allocPtr = allocContext->alloc_ptr;
-    _ASSERTE(allocPtr <= allocContext->alloc_limit);
-    if (totalSize > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
+    _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit());
+    if (totalSize > static_cast<SIZE_T>(eeAllocContext->getCombinedLimit() - allocPtr))
     {
         // Tail call to the slow helper
         return HCCALL2(JIT_NewArr1, arrayMT, size);
@@ -1970,10 +1973,11 @@ HCIMPL2_RAW(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayM
 
     _ASSERTE(ALIGN_UP(totalSize, DATA_ALIGNMENT) == totalSize);
 
-    gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context;
+    ee_alloc_context* eeAllocContext = &t_runtime_thread_locals.alloc_context;
+    gc_alloc_context* allocContext = &eeAllocContext->gc_allocation_context;
     BYTE *allocPtr = allocContext->alloc_ptr;
-    _ASSERTE(allocPtr <= allocContext->alloc_limit);
-    if (totalSize > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
+    _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit());
+    if (totalSize > static_cast<SIZE_T>(eeAllocContext->getCombinedLimit() - allocPtr))
     {
         // Tail call to the slow helper
         return HCCALL2(JIT_NewArr1, arrayMT, size);
@@ -2120,7 +2124,8 @@ HCIMPL2_RAW(Object*, JIT_Box_MP_FastPortable, CORINFO_CLASS_HANDLE type, void* u
     }
 
     _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
-    gc_alloc_context *allocContext = &t_runtime_thread_locals.alloc_context;
+    ee_alloc_context* eeAllocContext = &t_runtime_thread_locals.alloc_context;
+    gc_alloc_context* allocContext = &eeAllocContext->gc_allocation_context;
 
     TypeHandle typeHandle(type);
     _ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables
@@ -2139,8 +2144,8 @@ HCIMPL2_RAW(Object*, JIT_Box_MP_FastPortable, CORINFO_CLASS_HANDLE type, void* u
     _ASSERTE(size % DATA_ALIGNMENT == 0);
 
     BYTE *allocPtr = allocContext->alloc_ptr;
-    _ASSERTE(allocPtr <= allocContext->alloc_limit);
-    if (size > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
+    _ASSERTE(allocPtr <= eeAllocContext->getCombinedLimit());
+    if (size > static_cast<SIZE_T>(eeAllocContext->getCombinedLimit() - allocPtr))
     {
         // Tail call to the slow helper
         return HCCALL2(JIT_Box, type, unboxedData);
diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp
index f98a5cf58a2251..c3bfab86b66331 100644
--- a/src/coreclr/vm/threads.cpp
+++ b/src/coreclr/vm/threads.cpp
@@ -2763,11 +2763,12 @@ void Thread::CooperativeCleanup()
 
     if (GCHeapUtilities::IsGCHeapInitialized())
     {
+        gc_alloc_context* gc_alloc_context = GetAllocContext(); 
         // If the GC heap is initialized, we need to fix the alloc context for this detaching thread.
         // GetTotalAllocatedBytes reads dead_threads_non_alloc_bytes, but will suspend EE, being in COOP mode we cannot race with that
         // however, there could be other threads terminating and doing the same Add.
-        InterlockedExchangeAdd64((LONG64*)&dead_threads_non_alloc_bytes, t_runtime_thread_locals.alloc_context.alloc_limit - t_runtime_thread_locals.alloc_context.alloc_ptr);
-        GCHeapUtilities::GetGCHeap()->FixAllocContext(&t_runtime_thread_locals.alloc_context, NULL, NULL);
+        InterlockedExchangeAdd64((LONG64*)&dead_threads_non_alloc_bytes, gc_alloc_context->alloc_limit - gc_alloc_context->alloc_ptr);
+        GCHeapUtilities::GetGCHeap()->FixAllocContext(gc_alloc_context, NULL, NULL);
         t_runtime_thread_locals.alloc_context.init(); // re-initialize the context.
 
         // Clear out the alloc context pointer for this thread. When TLS is gone, this pointer will point into freed memory.
diff --git a/src/coreclr/vm/threads.h b/src/coreclr/vm/threads.h
index 429031cf5493a1..2f43379e5fe368 100644
--- a/src/coreclr/vm/threads.h
+++ b/src/coreclr/vm/threads.h
@@ -453,7 +453,7 @@ struct RuntimeThreadLocals
 {
     // on MP systems, each thread has its own allocation chunk so we can avoid
     // lock prefixes and expensive MP cache snooping stuff
-    gc_alloc_context alloc_context;
+    ee_alloc_context alloc_context;
 };
 
 #ifdef _MSC_VER
@@ -971,7 +971,25 @@ class Thread
 public:
     inline void InitRuntimeThreadLocals() { LIMITED_METHOD_CONTRACT; m_pRuntimeThreadLocals = PTR_RuntimeThreadLocals(&t_runtime_thread_locals); }
 
-    inline PTR_gc_alloc_context GetAllocContext() { LIMITED_METHOD_CONTRACT; return PTR_gc_alloc_context(&m_pRuntimeThreadLocals->alloc_context); }
+    inline ee_alloc_context* GetEEAllocContext()
+    {
+        LIMITED_METHOD_CONTRACT;
+        if (m_pRuntimeThreadLocals == nullptr)
+        {
+            return nullptr;
+        }
+        return &m_pRuntimeThreadLocals->alloc_context;
+    }
+
+    inline PTR_gc_alloc_context GetAllocContext()
+    {
+        LIMITED_METHOD_CONTRACT;
+        if (m_pRuntimeThreadLocals == nullptr)
+        {
+            return nullptr;
+        }
+        return PTR_gc_alloc_context(&m_pRuntimeThreadLocals->alloc_context.gc_allocation_context);
+    }
 
     // This is the type handle of the first object in the alloc context at the time
     // we fire the AllocationTick event. It's only for tooling purpose.
diff --git a/src/coreclr/vm/threadsuspend.cpp b/src/coreclr/vm/threadsuspend.cpp
index 9cdb8689984339..2908ab76bf335e 100644
--- a/src/coreclr/vm/threadsuspend.cpp
+++ b/src/coreclr/vm/threadsuspend.cpp
@@ -2363,7 +2363,7 @@ void Thread::PerformPreemptiveGC()
         // BUG(github #10318) - when not using allocation contexts, the alloc lock
         // must be acquired here. Until fixed, this assert prevents random heap corruption.
         _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
-        GCHeapUtilities::GetGCHeap()->StressHeap(&t_runtime_thread_locals.alloc_context);
+        GCHeapUtilities::GetGCHeap()->StressHeap(GetAllocContext());
         m_bGCStressing = FALSE;
     }
     m_GCOnTransitionsOK = TRUE;
diff --git a/src/native/managed/cdacreader/src/Contracts/Thread.cs b/src/native/managed/cdacreader/src/Contracts/Thread.cs
index 212807870990c1..bc055c0e856816 100644
--- a/src/native/managed/cdacreader/src/Contracts/Thread.cs
+++ b/src/native/managed/cdacreader/src/Contracts/Thread.cs
@@ -122,8 +122,8 @@ ThreadData IThread.GetThreadData(TargetPointer threadPointer)
             thread.OSId,
             (ThreadState)thread.State,
             (thread.PreemptiveGCDisabled & 0x1) != 0,
-            thread.RuntimeThreadLocals?.AllocContext.Pointer ?? TargetPointer.Null,
-            thread.RuntimeThreadLocals?.AllocContext.Limit ?? TargetPointer.Null,
+            thread.RuntimeThreadLocals?.AllocContext.GCAllocationContext.Pointer ?? TargetPointer.Null,
+            thread.RuntimeThreadLocals?.AllocContext.GCAllocationContext.Limit ?? TargetPointer.Null,
             thread.Frame,
             firstNestedException,
             thread.TEB,
diff --git a/src/native/managed/cdacreader/src/Data/EEAllocContext.cs b/src/native/managed/cdacreader/src/Data/EEAllocContext.cs
new file mode 100644
index 00000000000000..0b7f26d0307b8c
--- /dev/null
+++ b/src/native/managed/cdacreader/src/Data/EEAllocContext.cs
@@ -0,0 +1,18 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace Microsoft.Diagnostics.DataContractReader.Data;
+
+internal sealed class EEAllocContext : IData<EEAllocContext>
+{
+    static EEAllocContext IData<EEAllocContext>.Create(Target target, TargetPointer address)
+        => new EEAllocContext(target, address);
+
+    public EEAllocContext(Target target, TargetPointer address)
+    {
+        Target.TypeInfo type = target.GetTypeInfo(DataType.EEAllocContext);
+        GCAllocationContext = target.ProcessedData.GetOrAdd<GCAllocContext>(address + (ulong)type.Fields[nameof(GCAllocationContext)].Offset);
+    }
+
+    public GCAllocContext GCAllocationContext { get; init; }
+}
diff --git a/src/native/managed/cdacreader/src/Data/RuntimeThreadLocals.cs b/src/native/managed/cdacreader/src/Data/RuntimeThreadLocals.cs
index 2d7f92cb4cb247..2841d70417cdb6 100644
--- a/src/native/managed/cdacreader/src/Data/RuntimeThreadLocals.cs
+++ b/src/native/managed/cdacreader/src/Data/RuntimeThreadLocals.cs
@@ -11,8 +11,8 @@ static RuntimeThreadLocals IData<RuntimeThreadLocals>.Create(Target target, Targ
     public RuntimeThreadLocals(Target target, TargetPointer address)
     {
         Target.TypeInfo type = target.GetTypeInfo(DataType.RuntimeThreadLocals);
-        AllocContext = target.ProcessedData.GetOrAdd<GCAllocContext>(address + (ulong)type.Fields[nameof(AllocContext)].Offset);
+        AllocContext = target.ProcessedData.GetOrAdd<EEAllocContext>(address + (ulong)type.Fields[nameof(AllocContext)].Offset);
     }
 
-    public GCAllocContext AllocContext { get; init; }
+    public EEAllocContext AllocContext { get; init; }
 }
diff --git a/src/native/managed/cdacreader/src/DataType.cs b/src/native/managed/cdacreader/src/DataType.cs
index 3f20bdf7b095bc..d5dd45f2d54155 100644
--- a/src/native/managed/cdacreader/src/DataType.cs
+++ b/src/native/managed/cdacreader/src/DataType.cs
@@ -23,6 +23,7 @@ public enum DataType
     Thread,
     ThreadStore,
     GCAllocContext,
+    EEAllocContext,
     Exception,
     ExceptionInfo,
     RuntimeThreadLocals,