Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NativeAOT/ARM64] Generate frames compatible with Apple compact unwinding #107766

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,7 @@ class CodeGen final : public CodeGenInterface
virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() const;
bool genSaveFpLrWithAllCalleeSavedRegisters;
bool genForceFuncletFrameType5;
bool genReverseAndPairCalleeSavedRegisters;
#endif // TARGET_ARM64

//-------------------------------------------------------------------------
Expand Down
46 changes: 38 additions & 8 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -845,12 +845,19 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i

for (int i = 0; i < regStack.Height(); ++i)
{
RegPair regPair = regStack.Bottom(i);
RegPair regPair = genReverseAndPairCalleeSavedRegisters ? regStack.Top(i) : regStack.Bottom(i);
if (regPair.reg2 != REG_NA)
{
// We can use a STP instruction.
genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_IP0,
nullptr);
if (genReverseAndPairCalleeSavedRegisters)
{
genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr);
}
else
{
genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_IP0,
nullptr);
}

spOffset += 2 * slotSize;
}
Expand Down Expand Up @@ -926,8 +933,9 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe

// Save integer registers at higher addresses than floating-point registers.

regMaskTP maskSaveRegsFrame = regsToSaveMask & (RBM_FP | RBM_LR);
regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat & ~maskSaveRegsFrame;

if (maskSaveRegsFloat != RBM_NONE)
{
Expand All @@ -939,6 +947,13 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
if (maskSaveRegsInt != RBM_NONE)
{
genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset);
spDelta = 0;
lowestCalleeSavedOffset += genCountBits(maskSaveRegsInt) * FPSAVE_REGSIZE_BYTES;
}

if (maskSaveRegsFrame != RBM_NONE)
{
genPrologSaveRegPair(REG_FP, REG_LR, lowestCalleeSavedOffset, spDelta, false, REG_IP0, nullptr);
// No need to update spDelta, lowestCalleeSavedOffset since they're not used after this.
}
}
Expand Down Expand Up @@ -970,13 +985,20 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta
stackDelta = spDelta;
}

RegPair regPair = regStack.Top(i);
RegPair regPair = genReverseAndPairCalleeSavedRegisters ? regStack.Bottom(i) : regStack.Top(i);
if (regPair.reg2 != REG_NA)
{
spOffset -= 2 * slotSize;

genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, REG_IP1,
nullptr);
if (genReverseAndPairCalleeSavedRegisters)
{
genEpilogRestoreRegPair(regPair.reg2, regPair.reg1, spOffset, stackDelta, false, REG_IP1, nullptr);
}
else
{
genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair,
REG_IP1, nullptr);
}
}
else
{
Expand Down Expand Up @@ -1043,11 +1065,19 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in

// Save integer registers at higher addresses than floating-point registers.

regMaskTP maskRestoreRegsFrame = regsToRestoreMask & (RBM_FP | RBM_LR);
regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat & ~maskRestoreRegsFrame;

// Restore in the opposite order of saving.

if (maskRestoreRegsFrame != RBM_NONE)
{
int spFrameDelta = (maskRestoreRegsFloat != RBM_NONE || maskRestoreRegsInt != RBM_NONE) ? 0 : spDelta;
spOffset -= 2 * REGSIZE_BYTES;
genEpilogRestoreRegPair(REG_FP, REG_LR, spOffset, spFrameDelta, false, REG_IP1, nullptr);
}

if (maskRestoreRegsInt != RBM_NONE)
{
int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment?
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ CodeGen::CodeGen(Compiler* theCompiler)
#ifdef TARGET_ARM64
genSaveFpLrWithAllCalleeSavedRegisters = false;
genForceFuncletFrameType5 = false;
genReverseAndPairCalleeSavedRegisters = false;
#endif // TARGET_ARM64
}

Expand Down Expand Up @@ -4846,6 +4847,29 @@ void CodeGen::genFinalizeFrame()
}
#endif // TARGET_ARM

#ifdef TARGET_ARM64
if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsApplePlatform)
{
JITDUMP("Setting genReverseAndPairCalleeSavedRegisters = true");

genReverseAndPairCalleeSavedRegisters = true;

// Make sure we push the registers in pairs if possible. If we only allocate a contiguous
// block of registers this should add at most one integer and at most one floating point
// register to the list. The stack has to be 16-byte aligned, so in worst case it results
// in allocating 16 bytes more space on stack if odd number of integer and odd number of
// FP registers were occupied. Same number of instructions will be generated, just the
// STR instructions are replaced with STP (store pair).
regMaskTP maskModifiedRegs = regSet.rsGetModifiedRegsMask();
regMaskTP maskPairRegs = ((maskModifiedRegs & (RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14)).getLow() << 1) |
((maskModifiedRegs & (RBM_R19 | RBM_R21 | RBM_R23 | RBM_R25 | RBM_R27)).getLow() << 1);
if (maskPairRegs != RBM_NONE)
{
regSet.rsSetRegsModified(maskPairRegs);
}
}
#endif

#ifdef DEBUG
if (verbose)
{
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2807,6 +2807,16 @@ inline
{
*pBaseReg = REG_SPBASE;
}
#elif defined(TARGET_ARM64)
if (FPbased && !codeGen->isFramePointerRequired() && varOffset < 0 &&
lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT && codeGen->IsSaveFpLrWithAllCalleeSavedRegisters())
{
int spVarOffset = varOffset + codeGen->genSPtoFPdelta();
JITDUMP("lvaFrameAddress optimization for V%02u: [FP-%d] -> [SP+%d]\n", varNum, -varOffset, spVarOffset);
FPbased = false;
varOffset = spVarOffset;
}
*pFPbased = FPbased;
#else
*pFPbased = FPbased;
#endif
Expand Down
139 changes: 85 additions & 54 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5648,7 +5648,9 @@ void Compiler::lvaFixVirtualFrameOffsets()
#endif

// The delta to be added to virtual offset to adjust it relative to frame pointer or SP
int delta = 0;
int delta = 0;
int frameLocalsDelta = 0;
int frameBoundary = 0;

#ifdef TARGET_XARCH
delta += REGSIZE_BYTES; // pushed PC (return address) for x86/x64
Expand All @@ -5673,7 +5675,25 @@ void Compiler::lvaFixVirtualFrameOffsets()
// We set FP to be after LR, FP
delta += 2 * REGSIZE_BYTES;
}
#elif defined(TARGET_AMD64) || defined(TARGET_ARM64)
#elif defined(TARGET_ARM64)
else
{
// FP is used.
delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();

// If we placed FP/LR at the bottom of the frame we need to shift all the variables
// on the new frame to account for it. See lvaAssignVirtualFrameOffsetsToLocals.
if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters())
{
// We set FP to be after LR, FP
frameLocalsDelta = 2 * REGSIZE_BYTES;
frameBoundary = opts.IsOSR() ? -info.compPatchpointInfo->TotalFrameSize() : 0;
if (info.compIsVarArgs)
frameBoundary -= MAX_REG_ARG * REGSIZE_BYTES;
}
JITDUMP("--- delta bump %d for FP frame, %d inside frame for FP/LR relocation\n", delta, frameLocalsDelta);
}
#elif defined(TARGET_AMD64)
else
{
// FP is used.
Expand Down Expand Up @@ -5741,7 +5761,7 @@ void Compiler::lvaFixVirtualFrameOffsets()

#if defined(TARGET_X86)
// On x86, we set the stack offset for a promoted field
// to match a struct parameter in lvAssignFrameOffsetsToPromotedStructs.
// to match a struct parameter in lvaAssignFrameOffsetsToPromotedStructs.
if ((!varDsc->lvIsParam || parentvarDsc->lvIsParam) && promotionType == PROMOTION_TYPE_DEPENDENT)
#else
if (!varDsc->lvIsParam && promotionType == PROMOTION_TYPE_DEPENDENT)
Expand All @@ -5761,15 +5781,23 @@ void Compiler::lvaFixVirtualFrameOffsets()

if (doAssignStkOffs)
{
JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(), varDsc->GetStackOffset() + delta);
varDsc->SetStackOffset(varDsc->GetStackOffset() + delta);
int localDelta = delta;

if (frameLocalsDelta != 0 && varDsc->GetStackOffset() < frameBoundary)
{
localDelta += frameLocalsDelta;
}

JITDUMP("-- V%02u was %d, now %d\n", lclNum, varDsc->GetStackOffset(),
varDsc->GetStackOffset() + localDelta);
varDsc->SetStackOffset(varDsc->GetStackOffset() + localDelta);

#if DOUBLE_ALIGN
if (genDoubleAlign() && !codeGen->isFramePointerUsed())
{
if (varDsc->lvFramePointerBased)
{
varDsc->SetStackOffset(varDsc->GetStackOffset() - delta);
varDsc->SetStackOffset(varDsc->GetStackOffset() - localDelta);

// We need to re-adjust the offsets of the parameters so they are EBP
// relative rather than stack/frame pointer relative
Expand All @@ -5791,9 +5819,13 @@ void Compiler::lvaFixVirtualFrameOffsets()
assert(codeGen->regSet.tmpAllFree());
for (TempDsc* temp = codeGen->regSet.tmpListBeg(); temp != nullptr; temp = codeGen->regSet.tmpListNxt(temp))
{
temp->tdAdjustTempOffs(delta);
temp->tdAdjustTempOffs(delta + frameLocalsDelta);
}

if (lvaCachedGenericContextArgOffs < frameBoundary)
{
lvaCachedGenericContextArgOffs += frameLocalsDelta;
}
lvaCachedGenericContextArgOffs += delta;

#if FEATURE_FIXED_OUT_ARGS
Expand Down Expand Up @@ -6050,30 +6082,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
codeGen->setFramePointerUsed(codeGen->isFramePointerRequired());
}

#ifdef TARGET_ARM64
// Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is
// a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
// need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
// and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
// frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress.
// (It should be legal to use these frame types for every frame).

if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 0)
{
// Default configuration
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) ||
opts.compDbgEnC || compStressCompile(STRESS_GENERIC_VARN, 20));
}
else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1)
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames
}
else if ((opts.compJitSaveFpLrWithCalleeSavedRegisters == 2) || (opts.compJitSaveFpLrWithCalleeSavedRegisters == 3))
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames
}
#endif // TARGET_ARM64

#ifdef TARGET_XARCH
// On x86/amd64, the return address has already been pushed by the call instruction in the caller.
stkOffs -= TARGET_POINTER_SIZE; // return address;
Expand Down Expand Up @@ -6122,9 +6130,13 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
#endif // !TARGET_ARM

#ifdef TARGET_ARM64
// If the frame pointer is used, then we'll save FP/LR at the bottom of the stack.
// Otherwise, we won't store FP, and we'll store LR at the top, with the other callee-save
// registers (if any).
// If the frame pointer is used, then we'll save FP/LR either at the bottom of the stack
// or at the top of the stack depending on frame type. We make the decision after assigning
// the variables on the frame and then fix up the offsets in lvaFixVirtualFrameOffsets.
// For now, we proceed as if FP/LR were saved with the callee registers. If we later
// decide to move the FP/LR to the bottom of the frame it shifts all the assigned
// variables and temporaries by 16 bytes. The largest alignment we currently make is 16
// bytes for SIMD.

int initialStkOffs = 0;
if (info.compIsVarArgs)
Expand All @@ -6135,17 +6147,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
stkOffs -= initialStkOffs;
}

if (codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() || !isFramePointerUsed()) // Note that currently we always have
// a frame pointer
{
stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
}
else
{
// Subtract off FP and LR.
assert(compCalleeRegsPushed >= 2);
stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
}
stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;

#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

Expand Down Expand Up @@ -6815,15 +6817,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
}
#endif // TARGET_AMD64

#ifdef TARGET_ARM64
if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() && isFramePointerUsed()) // Note that currently we always have
// a frame pointer
{
// Create space for saving FP and LR.
stkOffs -= 2 * REGSIZE_BYTES;
}
#endif // TARGET_ARM64

#if FEATURE_FIXED_OUT_ARGS
if (lvaOutgoingArgSpaceSize > 0)
{
Expand Down Expand Up @@ -6861,6 +6854,44 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()

noway_assert(compLclFrameSize + originalFrameSize ==
(unsigned)-(stkOffs + (pushedCount * (int)TARGET_POINTER_SIZE)));

#ifdef TARGET_ARM64
// Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is
// a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
// need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
// and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
// frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress.
// (It should be legal to use these frame types for every frame).
//
// For Apple NativeAOT ABI we try to save the FP/LR registers on top to get canonical frame layout that can
// be represented with compact unwinding information. In order to maintain code quality we only do it when
// we can use SP-based addressing (!isFramePointerRequired) through lvaFrameAddress optimization, or if the
// whole frame is small enough that the negative FP-based addressing can address the whole frame.

if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 0)
{
if (IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsApplePlatform &&
(!codeGen->isFramePointerRequired() || codeGen->genTotalFrameSize() < 0x100))
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true);
}
else
{
// Default configuration
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) ||
opts.compDbgEnC ||
compStressCompile(Compiler::STRESS_GENERIC_VARN, 20));
}
}
else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1)
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames
}
else if ((opts.compJitSaveFpLrWithCalleeSavedRegisters == 2) || (opts.compJitSaveFpLrWithCalleeSavedRegisters == 3))
{
codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames
}
#endif // TARGET_ARM64
}

//------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,5 +120,18 @@ internal static class MachNative
public const uint PLATFORM_TVOSSIMULATOR = 8;
public const uint PLATFORM_WATCHOSSIMULATOR = 9;
public const uint PLATFORM_DRIVERKIT = 10;

public const uint UNWIND_ARM64_MODE_FRAMELESS = 0x02000000;
public const uint UNWIND_ARM64_MODE_DWARF = 0x03000000;
public const uint UNWIND_ARM64_MODE_FRAME = 0x04000000;
public const uint UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001;
public const uint UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002;
public const uint UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004;
public const uint UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008;
public const uint UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010;
public const uint UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100;
public const uint UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200;
public const uint UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400;
public const uint UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800;
}
}
Loading
Loading