Skip to content

Commit

Permalink
Preserve Vector Arg registers on Arm64
Browse files Browse the repository at this point in the history
Fix #14371
  • Loading branch information
CarolEidt committed Jan 10, 2019
1 parent 8f91ac8 commit 04e9ac2
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 66 deletions.
34 changes: 17 additions & 17 deletions src/pal/inc/unixasmmacrosarm64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -137,14 +137,14 @@ C_FUNC(\Name\()_End):
// ArgumentRegisters::x2
// ArgumentRegisters::x1
// ArgumentRegisters::x0
// FloatRegisters::d7
// FloatRegisters::d6
// FloatRegisters::d5
// FloatRegisters::d4
// FloatRegisters::d3
// FloatRegisters::d2
// FloatRegisters::d1
// FloatRegisters::d0
// FloatRegisters::q7
// FloatRegisters::q6
// FloatRegisters::q5
// FloatRegisters::q4
// FloatRegisters::q3
// FloatRegisters::q2
// FloatRegisters::q1
// FloatRegisters::q0
.macro PROLOG_WITH_TRANSITION_BLOCK extraLocals = 0, SaveFPArgs = 1

__PWTB_FloatArgumentRegisters = \extraLocals
Expand Down Expand Up @@ -200,13 +200,13 @@ C_FUNC(\Name\()_End):

.endm

// Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
// Reserve 128 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
.macro SAVE_FLOAT_ARGUMENT_REGISTERS reg, ofs

stp d0, d1, [\reg, #(\ofs)]
stp d2, d3, [\reg, #(\ofs + 16)]
stp d4, d5, [\reg, #(\ofs + 32)]
stp d6, d7, [\reg, #(\ofs + 48)]
stp q0, q1, [\reg, #(\ofs)]
stp q2, q3, [\reg, #(\ofs + 32)]
stp q4, q5, [\reg, #(\ofs + 64)]
stp q6, q7, [\reg, #(\ofs + 96)]

.endm

Expand All @@ -222,10 +222,10 @@ C_FUNC(\Name\()_End):

.macro RESTORE_FLOAT_ARGUMENT_REGISTERS reg, ofs

ldp d0, d1, [\reg, #(\ofs)]
ldp d2, d3, [\reg, #(\ofs + 16)]
ldp d4, d5, [\reg, #(\ofs + 32)]
ldp d6, d7, [\reg, #(\ofs + 48)]
ldp q0, q1, [\reg, #(\ofs)]
ldp q2, q3, [\reg, #(\ofs + 32)]
ldp q4, q5, [\reg, #(\ofs + 64)]
ldp q6, q7, [\reg, #(\ofs + 96)]

.endm

Expand Down
8 changes: 4 additions & 4 deletions src/vm/arm64/CallDescrWorkerARM64.asm
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ Ldonestack
;; given in x9.
ldr x9, [x19,#CallDescrData__pFloatArgumentRegisters]
cbz x9, LNoFloatingPoint
ldp d0, d1, [x9]
ldp d2, d3, [x9, #16]
ldp d4, d5, [x9, #32]
ldp d6, d7, [x9, #48]
ldp q0, q1, [x9]
ldp q2, q3, [x9, #32]
ldp q4, q5, [x9, #64]
ldp q6, q7, [x9, #96]
LNoFloatingPoint

;; Copy [pArgumentRegisters, ..., pArgumentRegisters + 56]
Expand Down
3 changes: 2 additions & 1 deletion src/vm/arm64/asmconstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ ASMCONSTANTS_C_ASSERT(AppDomain__m_dwId == offsetof(AppDomain, m_dwId));
#define SIZEOF__ArgumentRegisters 0x40
ASMCONSTANTS_C_ASSERT(SIZEOF__ArgumentRegisters == sizeof(ArgumentRegisters))

#define SIZEOF__FloatArgumentRegisters 0x40
// There are 8 128-bit registers in FloatArgumentRegisters
#define SIZEOF__FloatArgumentRegisters 0x80
ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegisters))

#define CallDescrData__pSrc 0x00
Expand Down
24 changes: 12 additions & 12 deletions src/vm/arm64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -121,18 +121,18 @@ LEAF_END HelperMethodFrameRestoreState, _TEXT
// The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk, _TEXT, NoHandler

PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

mov x0, x12
bl NDirectImportWorker
mov x12, x0

// pop the stack and restore original register state
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
RESTORE_ARGUMENT_REGISTERS sp, 16
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224

// If we got back from NDirectImportWorker, the MD has been successfully
// linked. Proceed to execute the original DLL call.
Expand Down Expand Up @@ -481,9 +481,9 @@ WRITE_BARRIER_END JIT_WriteBarrier
NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler

// Save arguments and return address
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

// Refer to ZapImportVirtualThunk::Save
// for details on this.
Expand All @@ -500,8 +500,8 @@ NESTED_ENTRY VirtualMethodFixupStub, _TEXT, NoHandler

// pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224

PATCH_LABEL VirtualMethodFixupPatchLabel

Expand Down Expand Up @@ -711,9 +711,9 @@ COMToCLRDispatchHelper_RegSetup
NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix

// Save arguments and return address
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -160
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -224
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

mov x0, x12
bl C_FUNC(TheUMEntryPrestubWorker)
Expand All @@ -723,8 +723,8 @@ NESTED_ENTRY TheUMEntryPrestub, _TEXT, UnhandledExceptionHandlerUnix

// pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 160
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 224

// and tailcall to the actual method
EPILOG_BRANCH_REG x12
Expand Down
29 changes: 16 additions & 13 deletions src/vm/arm64/asmhelpers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -184,18 +184,18 @@ Done
; The call in ndirect import precode points to this function.
NESTED_ENTRY NDirectImportThunk

PROLOG_SAVE_REG_PAIR fp, lr, #-160!
PROLOG_SAVE_REG_PAIR fp, lr, #-240!
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

mov x0, x12
bl NDirectImportWorker
mov x12, x0

; pop the stack and restore original register state
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
RESTORE_ARGUMENT_REGISTERS sp, 16
EPILOG_RESTORE_REG_PAIR fp, lr, #160!
EPILOG_RESTORE_REG_PAIR fp, lr, #240!

; If we got back from NDirectImportWorker, the MD has been successfully
; linked. Proceed to execute the original DLL call.
Expand Down Expand Up @@ -437,9 +437,9 @@ Exit
NESTED_ENTRY VirtualMethodFixupStub

; Save arguments and return address
PROLOG_SAVE_REG_PAIR fp, lr, #-160!
PROLOG_SAVE_REG_PAIR fp, lr, #-240!
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

; Refer to ZapImportVirtualThunk::Save
; for details on this.
Expand All @@ -456,8 +456,8 @@ Exit

; pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
EPILOG_RESTORE_REG_PAIR fp, lr, #160!
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
EPILOG_RESTORE_REG_PAIR fp, lr, #240!

PATCH_LABEL VirtualMethodFixupPatchLabel

Expand Down Expand Up @@ -761,7 +761,10 @@ COMToCLRDispatchHelper_StackLoop

COMToCLRDispatchHelper_RegSetup

RESTORE_FLOAT_ARGUMENT_REGISTERS x1, -1 * GenericComCallStub_FrameOffset
; We need an aligned offset for restoring float args, so do the subtraction into
; a scratch register
sub x5, x1, GenericComCallStub_FrameOffset
RESTORE_FLOAT_ARGUMENT_REGISTERS x5, 0

mov lr, x2
mov x12, x3
Expand Down Expand Up @@ -791,9 +794,9 @@ COMToCLRDispatchHelper_RegSetup
NESTED_ENTRY TheUMEntryPrestub,,UMEntryPrestubUnwindFrameChainHandler

; Save arguments and return address
PROLOG_SAVE_REG_PAIR fp, lr, #-160!
PROLOG_SAVE_REG_PAIR fp, lr, #-240!
SAVE_ARGUMENT_REGISTERS sp, 16
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 88
SAVE_FLOAT_ARGUMENT_REGISTERS sp, 96

mov x0, x12
bl TheUMEntryPrestubWorker
Expand All @@ -803,8 +806,8 @@ COMToCLRDispatchHelper_RegSetup

; pop the stack and restore original register state
RESTORE_ARGUMENT_REGISTERS sp, 16
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 88
EPILOG_RESTORE_REG_PAIR fp, lr, #160!
RESTORE_FLOAT_ARGUMENT_REGISTERS sp, 96
EPILOG_RESTORE_REG_PAIR fp, lr, #240!

; and tailcall to the actual method
EPILOG_BRANCH_REG x12
Expand Down
18 changes: 9 additions & 9 deletions src/vm/arm64/asmmacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ __PWTB_SAVE_ARGUMENT_REGISTERS_OFFSET SETA 0

MEND

; Reserve 64 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
; Reserve 128 bytes of memory before calling SAVE_FLOAT_ARGUMENT_REGISTERS
MACRO
SAVE_FLOAT_ARGUMENT_REGISTERS $reg, $offset

Expand All @@ -195,10 +195,10 @@ __PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA $offset
__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA 0
ENDIF

stp d0, d1, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
stp d2, d3, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 16)]
stp d4, d5, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
stp d6, d7, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 48)]
stp q0, q1, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
stp q2, q3, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
stp q4, q5, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 64)]
stp q6, q7, [$reg, #(__PWTB_SAVE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 96)]
MEND

MACRO
Expand Down Expand Up @@ -231,10 +231,10 @@ __PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA $offset
__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET SETA 0
ENDIF

ldp d0, d1, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
ldp d2, d3, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 16)]
ldp d4, d5, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
ldp d6, d7, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 48)]
ldp q0, q1, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET)]
ldp q2, q3, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 32)]
ldp q4, q5, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 64)]
ldp q6, q7, [$reg, #(__PWTB_RESTORE_FLOAT_ARGUMENT_REGISTERS_OFFSET + 96)]
MEND

; ------------------------------------------------------------------
Expand Down
8 changes: 4 additions & 4 deletions src/vm/arm64/calldescrworkerarm64.S
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ LOCAL_LABEL(donestack):
// given in x8.
ldr x9, [x19,#CallDescrData__pFloatArgumentRegisters]
cbz x9, LOCAL_LABEL(NoFloatingPoint)
ldp d0, d1, [x9]
ldp d2, d3, [x9, #16]
ldp d4, d5, [x9, #32]
ldp d6, d7, [x9, #48]
ldp q0, q1, [x9]
ldp q2, q3, [x9, #32]
ldp q4, q5, [x9, #64]
ldp q6, q7, [x9, #96]
LOCAL_LABEL(NoFloatingPoint):

// Copy [pArgumentRegisters, ..., pArgumentRegisters + 56]
Expand Down
12 changes: 6 additions & 6 deletions src/vm/arm64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ static_assert(((STACK_ELEM_SIZE & (STACK_ELEM_SIZE-1)) == 0), "STACK_ELEM_SIZE m
//**********************************************************************

//--------------------------------------------------------------------
// This represents the callee saved (non-volatile) registers saved as
// This represents the callee saved (non-volatile) integer registers saved as
// of a FramedMethodFrame.
//--------------------------------------------------------------------
typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters;
Expand All @@ -111,7 +111,7 @@ struct CalleeSavedRegisters {
};

//--------------------------------------------------------------------
// This represents the arguments that are stored in volatile registers.
// This represents the arguments that are stored in volatile integer registers.
// This should not overlap the CalleeSavedRegisters since those are already
// saved separately and it would be wasteful to save the same register twice.
// If we do use a non-volatile register as an argument, then the ArgIterator
Expand All @@ -138,10 +138,10 @@ typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters;
struct FloatArgumentRegisters {
// armV8 supports 32 floating point registers. Each register is 128bits long.
// It can be accessed as 128-bit value or 64-bit value(d0-d31) or as 32-bit value (s0-s31)
// or as 16-bit value or as 8-bit values. C# only has two builtin floating datatypes float(32-bit) and
// double(64-bit). It does not have a quad-precision floating point.So therefore it does not make sense to
// store full 128-bit values in Frame when the upper 64 bit will not contain any values.
double d[8]; // d0-d7
// or as 16-bit value or as 8-bit values.
// Although C# only has two builtin floating datatypes float(32-bit) and double(64-bit),
// HW Intrinsics support using the full 128-bit value for passing Vectors.
NEON128 q[8]; // q0-q7
};


Expand Down

0 comments on commit 04e9ac2

Please sign in to comment.