Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Preserve Vector Arg registers on Arm64 #22257

Merged
merged 2 commits into from
Feb 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions src/pal/inc/unixasmmacrosarm64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,14 @@ C_FUNC(\Name\()_End):
// ArgumentRegisters::x2
// ArgumentRegisters::x1
// ArgumentRegisters::x0
// FloatRegisters::d7
// FloatRegisters::d6
// FloatRegisters::d5
// FloatRegisters::d4
// FloatRegisters::d3
// FloatRegisters::d2
// FloatRegisters::d1
// FloatRegisters::d0
// FloatRegisters::q7
// FloatRegisters::q6
// FloatRegisters::q5
// FloatRegisters::q4
// FloatRegisters::q3
// FloatRegisters::q2
// FloatRegisters::q1
// FloatRegisters::q0
.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1

__PWTB_FloatArgumentRegisters = \extraLocals
Expand Down Expand Up @@ -200,13 +200,13 @@ C_FUNC(\Name\()_End):

.endm

// Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
// Reserve 128 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
.macro SAVE_FLOAT_ARGUMENT_REGISTERS reg, ofs

stp d0, d1, [\reg, #(\ofs)]
stp d2, d3, [\reg, #(\ofs + 16)]
stp d4, d5, [\reg, #(\ofs + 32)]
stp d6, d7, [\reg, #(\ofs + 48)]
stp q0, q1, [\reg, #(\ofs)]
stp q2, q3, [\reg, #(\ofs + 32)]
stp q4, q5, [\reg, #(\ofs + 64)]
stp q6, q7, [\reg, #(\ofs + 96)]

.endm

Expand All @@ -222,10 +222,10 @@ C_FUNC(\Name\()_End):

.macro RESTORE_FLOAT_ARGUMENT_REGISTERS reg, ofs

ldp d0, d1, [\reg, #(\ofs)]
ldp d2, d3, [\reg, #(\ofs + 16)]
ldp d4, d5, [\reg, #(\ofs + 32)]
ldp d6, d7, [\reg, #(\ofs + 48)]
ldp q0, q1, [\reg, #(\ofs)]
ldp q2, q3, [\reg, #(\ofs + 32)]
ldp q4, q5, [\reg, #(\ofs + 64)]
ldp q6, q7, [\reg, #(\ofs + 96)]

.endm

Expand Down
21 changes: 9 additions & 12 deletions src/vm/argdestination.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,17 @@ class ArgDestination

int floatRegCount = m_argLocDescForStructInRegs->m_cFloatReg;
bool typeFloat = m_argLocDescForStructInRegs->m_isSinglePrecision;
void* dest = this->GetDestinationAddress();
UINT64* dest = (UINT64*) this->GetDestinationAddress();

if (typeFloat)
for (int i = 0; i < floatRegCount; ++i)
{
for (int i = 0; i < floatRegCount; ++i)
{
// Copy 4 bytes on 8 bytes alignment
*((UINT64*)dest + i) = *((UINT32*)src + i);
}
}
else
{
// We can just do a memcpy.
memcpyNoGCRefs(dest, src, fieldBytes);
// Copy 4 or 8 bytes from src.
UINT64 val = typeFloat ? *((UINT32*)src + i) : *((UINT64*)src + i);
// Always store 8 bytes
*(dest++) = val;
// For now, always zero the next 8 bytes.
// (When HVAs are supported we will get the next 8 bytes from src.)
*(dest++) = 0;
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/vm/arm64/CallDescrWorkerARM64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ Ldonestack
;; given in x9.
ldr x9, [x19,#CallDescrData__pFloatArgumentRegisters]
cbz x9, LNoFloatingPoint
ldp d0, d1, [x9]
ldp d2, d3, [x9, #16]
ldp d4, d5, [x9, #32]
ldp d6, d7, [x9, #48]
ldp q0, q1, [x9]
ldp q2, q3, [x9, #32]
ldp q4, q5, [x9, #64]
ldp q6, q7, [x9, #96]
LNoFloatingPoint

;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 56]
Expand Down
3 changes: 2 additions & 1 deletion src/vm/arm64/asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ ASMCONSTANTS_C_ASSERT(AppDomain__m_dwId == offsetof(AppDomain, m_dwId));
#define SIZEOF__ArgumentRegisters 0x40
ASMCONSTANTS_C_ASSERT(SIZEOF__ArgumentRegisters == sizeof(ArgumentRegisters))

#define SIZEOF__FloatArgumentRegisters 0x40
// There are 8 128-bit registers in FloatArgumentRegisters
#define SIZEOF__FloatArgumentRegisters 0x80
ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegisters))

#define CallDescrData__pSrc 0x00
Expand Down
24 changes: 12 additions & 12 deletions src/vm/arm64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -121,18 +121,18 @@ LEAF_END HelperMethodFrameRestoreState, _TEXT
// The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler

PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

mov x0, x12
bl NDirectImportWorker
mov x12, x0

// pop the stack and restore original register state
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
RESTORE_ARGUMENT_REGISTERS sp, 16
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224

// If we got back from NDirectImportWorker, the MD has been successfully
// linked. Proceed to execute the original DLL call.
Expand Down Expand Up @@ -481,9 +481,9 @@ WRITE_BARRIER_END JIT_WriteBarrier
NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler

// Save arguments and return address
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

// Refer to ZapImportVirtualThunk::Save
// for details on this.
Expand All @@ -500,8 +500,8 @@ NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler

// pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224

PATCH_LABEL VirtualMethodFixupPatchLabel

Expand Down Expand Up @@ -711,9 +711,9 @@ COMToCLRDispatchHelper_RegSetup
NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix

// Save arguments and return address
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

mov x0, x12
bl C_FUNC(TheUMEntryPrestubWorker)
Expand All @@ -723,8 +723,8 @@ NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix

// pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224

// and tailcall to the actual method
EPILOG_BRANCH_REG x12
Expand Down
29 changes: 16 additions & 13 deletions src/vm/arm64/asmhelpers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -184,18 +184,18 @@ Done
; The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk

PROLOG_SAVE_REG_PAIR fp, lr, #-160!
PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

mov x0, x12
bl NDirectImportWorker
mov x12, x0

; pop the stack and restore original register state
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
RESTORE_ARGUMENT_REGISTERS sp, 16
EPILOG_RESTORE_REG_PAIR fp, lr, #160!
EPILOG_RESTORE_REG_PAIR fp, lr, #224!

; If we got back from NDirectImportWorker, the MD has been successfully
; linked. Proceed to execute the original DLL call.
Expand Down Expand Up @@ -437,9 +437,9 @@ Exit
NESTED_ENTRY VirtualMethodFixupStub

; Save arguments and return address
PROLOG_SAVE_REG_PAIR fp, lr, #-160!
PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

; Refer to ZapImportVirtualThunk::Save
; for details on this.
Expand All @@ -456,8 +456,8 @@ Exit

; pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
EPILOG_RESTORE_REG_PAIR fp, lr, #160!
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
EPILOG_RESTORE_REG_PAIR fp, lr, #224!

PATCH_LABEL VirtualMethodFixupPatchLabel

Expand Down Expand Up @@ -761,7 +761,10 @@ COMToCLRDispatchHelper_StackLoop

COMToCLRDispatchHelper_RegSetup

RESTORE_FLOAT_ARGUMENT_REGISTERS x1, -1 * GenericComCallStub_FrameOffset
; We need an aligned offset for restoring float args, so do the subtraction into
; a scratch register
sub x5, x1, GenericComCallStub_FrameOffset
RESTORE_FLOAT_ARGUMENT_REGISTERS x5, 0

mov lr, x2
mov x12, x3
Expand Down Expand Up @@ -791,9 +794,9 @@ COMToCLRDispatchHelper_RegSetup
NESTED_ENTRY TheUMEntryPrestub,,UMEntryPrestubUnwindFrameChainHandler

; Save arguments and return address
PROLOG_SAVE_REG_PAIR fp, lr, #-160!
PROLOG_SAVE_REG_PAIR fp, lr, #-224!
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

mov x0, x12
bl TheUMEntryPrestubWorker
Expand All @@ -803,8 +806,8 @@ COMToCLRDispatchHelper_RegSetup

; pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
EPILOG_RESTORE_REG_PAIR fp, lr, #160!
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
EPILOG_RESTORE_REG_PAIR fp, lr, #224!

; and tailcall to the actual method
EPILOG_BRANCH_REG x12
Expand Down
18 changes: 9 additions & 9 deletions src/vm/arm64/asmmacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ __PWTB_SAVE_ARGUMENT_REGISTERS_OFFSET SETA 0

MEND

; Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
; Reserve 128 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
MACRO
SAVE_FLOAT_ARGUMENT_REGISTERS $reg, $offset

Expand All @@ -195,10 +195,10 @@ __PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA $offset
__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA 0
ENDIF

stp d0, d1, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
stp d2, d3, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 16)]
stp d4, d5, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
stp d6, d7, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 48)]
stp q0, q1, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
stp q2, q3, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
stp q4, q5, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 64)]
stp q6, q7, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 96)]
MEND

MACRO
Expand Down Expand Up @@ -231,10 +231,10 @@ __PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA $offset
__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA 0
ENDIF

ldp d0, d1, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
ldp d2, d3, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 16)]
ldp d4, d5, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
ldp d6, d7, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 48)]
ldp q0, q1, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
ldp q2, q3, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
ldp q4, q5, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 64)]
ldp q6, q7, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 96)]
MEND

; ------------------------------------------------------------------
Expand Down
8 changes: 4 additions & 4 deletions src/vm/arm64/calldescrworkerarm64.S
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ LOCAL_LABEL(donestack):
// given in x8.
ldr x9, [x19,#CallDescrData__pFloatArgumentRegisters]
cbz x9, LOCAL_LABEL(NoFloatingPoint)
ldp d0, d1, [x9]
ldp d2, d3, [x9, #16]
ldp d4, d5, [x9, #32]
ldp d6, d7, [x9, #48]
ldp q0, q1, [x9]
ldp q2, q3, [x9, #32]
ldp q4, q5, [x9, #64]
ldp q6, q7, [x9, #96]
LOCAL_LABEL(NoFloatingPoint):

// Copy [pArgumentRegisters, ..., pArgumentRegisters + 56]
Expand Down
12 changes: 6 additions & 6 deletions src/vm/arm64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE m
//**********************************************************************

//--------------------------------------------------------------------
// This represents the callee saved (non-volatile) registers saved as
// This represents the callee saved (non-volatile) integer registers saved as
// of a FramedMethodFrame.
//--------------------------------------------------------------------
typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters;
Expand All @@ -111,7 +111,7 @@ struct CalleeSavedRegisters {
};

//--------------------------------------------------------------------
// This represents the arguments that are stored in volatile registers.
// This represents the arguments that are stored in volatile integer registers.
// This should not overlap the CalleeSavedRegisters since those are already
// saved separately and it would be wasteful to save the same register twice.
// If we do use a non-volatile register as an argument, then the ArgIterator
Expand All @@ -138,10 +138,10 @@ typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters;
struct FloatArgumentRegisters {
// armV8 supports 32 floating point registers. Each register is 128bits long.
// It can be accessed as 128-bit value or 64-bit value(d0-d31) or as 32-bit value (s0-s31)
// or as 16-bit value or as 8-bit values. C# only has two builtin floating datatypes float(32-bit) and
// double(64-bit). It does not have a quad-precision floating point.So therefore it does not make sense to
// store full 128-bit values in Frame when the upper 64 bit will not contain any values.
double d[8]; // d0-d7
// or as 16-bit value or as 8-bit values.
// Although C# only has two builtin floating datatypes float(32-bit) and double(64-bit),
// HW Intrinsics support using the full 128-bit value for passing Vectors.
NEON128 q[8]; // q0-q7
};


Expand Down
7 changes: 4 additions & 3 deletions src/vm/callingconvention.h
Original file line number Diff line number Diff line change
Expand Up @@ -573,8 +573,8 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE

if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset))
{
// Dividing by 8 as size of each register in FloatArgumentRegisters is 8 bytes.
pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 8;
// Dividing by 16 as size of each register in FloatArgumentRegisters is 16 bytes.
pLoc->m_idxFloatReg = (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters()) / 16;

if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA())
{
Expand Down Expand Up @@ -1322,7 +1322,8 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
{
if (cFPRegs + m_idxFPReg <= 8)
{
int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 8;
// Each floating point register in the argument area is 16 bytes.
int argOfs = TransitionBlock::GetOffsetOfFloatArgumentRegisters() + m_idxFPReg * 16;
m_idxFPReg += cFPRegs;
return argOfs;
}
Expand Down