diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp index 1b48491e737e..cd1b90ffa2cb 100644 --- a/src/jit/assertionprop.cpp +++ b/src/jit/assertionprop.cpp @@ -75,7 +75,7 @@ void Compiler::optAddCopies() // We only add copies for non temp local variables // that have a single def and that can possibly be enregistered - if (varDsc->lvIsTemp || !varDsc->lvSingleDef || !varTypeCanReg(typ)) + if (varDsc->lvIsTemp || !varDsc->lvSingleDef || !varTypeIsEnregisterable(typ)) { continue; } diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 91bb221b1958..efbb59007c9a 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -2023,10 +2023,10 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) GenTree* op1 = treeNode->gtGetOp1(); var_types targetType = treeNode->TypeGet(); - assert(!isStructReturn(treeNode)); + assert(targetType != TYP_STRUCT); assert(targetType != TYP_VOID); - regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; + regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET; bool movRequired = (op1->gtRegNum != retReg); diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index 55b3f0a7ba95..e50778221bd6 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -2355,7 +2355,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } else { - assert(!varTypeIsStruct(call)); + assert(call->gtType != TYP_STRUCT); if (call->gtType == TYP_REF) { @@ -2509,9 +2509,13 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. returnReg = REG_PINVOKE_TCB; } + else if (compiler->opts.compUseSoftFP) + { + returnReg = REG_INTRET; + } else #endif // _TARGET_ARM_ - if (varTypeIsFloating(returnType) && !compiler->opts.compUseSoftFP) + if (varTypeUsesFloatArgReg(returnType)) { returnReg = REG_FLOATRET; } @@ -3501,8 +3505,13 @@ bool CodeGen::isStructReturn(GenTree* treeNode) // For the GT_RET_FILT, the return is always // a bool or a void, for the end of a finally block. noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT); + var_types returnType = treeNode->TypeGet(); - return varTypeIsStruct(treeNode); +#ifdef _TARGET_ARM64_ + return varTypeIsStruct(returnType) && (compiler->info.compRetNativeType == TYP_STRUCT); +#else + return varTypeIsStruct(returnType); +#endif } //------------------------------------------------------------------------ diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 0ec2ba74e7e0..c7a6e83d01c9 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -3305,7 +3305,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere { // A struct might be passed partially in XMM register for System V calls. // So a single arg might use both register files. - if (isFloatRegType(regType) != doingFloat) + if (emitter::isFloatReg(varDsc->lvArgReg) != doingFloat) { continue; } @@ -10158,7 +10158,11 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass) structPassingKind howToReturnStruct; var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct); +#ifdef _TARGET_ARM64_ + return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType)); +#else return (varTypeIsStruct(returnType)); +#endif } //---------------------------------------------- @@ -10167,11 +10171,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass) bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass) { -#ifdef FEATURE_HFA - return varTypeIsFloating(GetHfaType(hClass)); -#else - return false; -#endif + return varTypeIsValidHfaType(GetHfaType(hClass)); } bool Compiler::IsHfa(GenTree* tree) @@ -10204,7 +10204,19 @@ var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass) { #ifdef FEATURE_HFA CorInfoType corType = info.compCompHnd->getHFAType(hClass); - if (corType != CORINFO_TYPE_UNDEF) +#ifdef _TARGET_ARM64_ + if (corType == CORINFO_TYPE_VALUECLASS) + { + // This is a vector type. + // HVAs are only supported on ARM64, and only for homogeneous aggregates of 8 or 16 byte vectors. + // For 8-byte vectors corType will be returned as CORINFO_TYPE_DOUBLE. + result = TYP_SIMD16; + // This type may not appear elsewhere, but it will occupy a floating point register. + compFloatingPointUsed = true; + } + else +#endif // _TARGET_ARM64_ + if (corType != CORINFO_TYPE_UNDEF) { result = JITtype2varType(corType); } diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 97d04e7c16d7..a7c01a78f7e7 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -1133,9 +1133,9 @@ void CodeGen::genStructReturn(GenTree* treeNode) unsigned regCount = retTypeDesc.GetReturnRegCount(); assert(regCount == MAX_RET_REG_COUNT); - if (varTypeIsEnregisterableStruct(op1)) + if (varTypeIsEnregisterable(op1)) { - // Right now the only enregistrable structs supported are SIMD vector types. + // Right now the only enregisterable structs supported are SIMD vector types. assert(varTypeIsSIMD(op1)); assert(op1->isUsedFromReg()); diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index e005a77c2900..15ed19c62d5b 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -573,8 +573,8 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd) // of size 'structSize'. // We examine 'clsHnd' to check the GC layout of the struct and // return TYP_REF for structs that simply wrap an object. -// If the struct is a one element HFA, we will return the -// proper floating point type. +// If the struct is a one element HFA/HVA, we will return the +// proper floating point or vector type. // // Arguments: // structSize - the size of the struct type, cannot be zero @@ -592,13 +592,64 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd) // same way as any other 8-byte struct // For ARM32 if we have an HFA struct that wraps a 64-bit double // we will return TYP_DOUBLE. +// For vector calling conventions, a vector is considered a "primitive" +// type, as it is passed in a single register. // var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd, bool isVarArg) { assert(structSize != 0); - var_types useType; + var_types useType = TYP_UNKNOWN; +// Start by determining if we have an HFA/HVA with a single element. +#ifdef FEATURE_HFA +#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) + // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated + // as if they are not HFA types. + if (!isVarArg) +#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) + { + switch (structSize) + { + case 4: + case 8: +#ifdef _TARGET_ARM64_ + case 16: +#endif // _TARGET_ARM64_ + { + var_types hfaType; +#ifdef ARM_SOFTFP + // For ARM_SOFTFP, HFA is unsupported so we need to check in another way. + // This matters only for size-4 struct because bigger structs would be processed with RetBuf. + if (isSingleFloat32Struct(clsHnd)) + { + hfaType = TYP_FLOAT; + } +#else // !ARM_SOFTFP + hfaType = GetHfaType(clsHnd); +#endif // ARM_SOFTFP + // We're only interested in the case where the struct size is equal to the size of the hfaType. + if (varTypeIsValidHfaType(hfaType)) + { + if (genTypeSize(hfaType) == structSize) + { + useType = hfaType; + } + else + { + return TYP_UNKNOWN; + } + } + } + } + if (useType != TYP_UNKNOWN) + { + return useType; + } + } +#endif // FEATURE_HFA + + // Now deal with non-HFA/HVA structs. switch (structSize) { case 1: @@ -618,15 +669,8 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS #ifdef _TARGET_64BIT_ case 4: - if (IsHfa(clsHnd)) - { - // A structSize of 4 with IsHfa, it must be an HFA of one float - useType = TYP_FLOAT; - } - else - { - useType = TYP_INT; - } + // We dealt with the one-float HFA above. All other 4-byte structs are handled as INT. + useType = TYP_INT; break; #if !defined(_TARGET_XARCH_) || defined(UNIX_AMD64_ABI) @@ -640,86 +684,13 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS #endif // _TARGET_64BIT_ case TARGET_POINTER_SIZE: -#ifdef ARM_SOFTFP - // For ARM_SOFTFP, HFA is unsupported so we need to check in another way - // This matters only for size-4 struct cause bigger structs would be processed with RetBuf - if (isSingleFloat32Struct(clsHnd)) -#else // !ARM_SOFTFP - if (IsHfa(clsHnd) -#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) - // Arm64 Windows VarArg methods arguments will not - // classify HFA types, they will need to be treated - // as if they are not HFA types. - && !isVarArg -#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) - ) -#endif // ARM_SOFTFP - { -#ifdef _TARGET_64BIT_ - var_types hfaType = GetHfaType(clsHnd); - - // A structSize of 8 with IsHfa, we have two possiblities: - // An HFA of one double or an HFA of two floats - // - // Check and exclude the case of an HFA of two floats - if (hfaType == TYP_DOUBLE) - { - // We have an HFA of one double - useType = TYP_DOUBLE; - } - else - { - assert(hfaType == TYP_FLOAT); - - // We have an HFA of two floats - // This should be passed or returned in two FP registers - useType = TYP_UNKNOWN; - } -#else // a 32BIT target - // A structSize of 4 with IsHfa, it must be an HFA of one float - useType = TYP_FLOAT; -#endif // _TARGET_64BIT_ - } - else - { - BYTE gcPtr = 0; - // Check if this pointer-sized struct is wrapping a GC object - info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); - useType = getJitGCType(gcPtr); - } - break; - -#ifdef _TARGET_ARM_ - case 8: - if (IsHfa(clsHnd)) - { - var_types hfaType = GetHfaType(clsHnd); - - // A structSize of 8 with IsHfa, we have two possiblities: - // An HFA of one double or an HFA of two floats - // - // Check and exclude the case of an HFA of two floats - if (hfaType == TYP_DOUBLE) - { - // We have an HFA of one double - useType = TYP_DOUBLE; - } - else - { - assert(hfaType == TYP_FLOAT); - - // We have an HFA of two floats - // This should be passed or returned in two FP registers - useType = TYP_UNKNOWN; - } - } - else - { - // We don't have an HFA - useType = TYP_UNKNOWN; - } - break; -#endif // _TARGET_ARM_ + { + BYTE gcPtr = 0; + // Check if this pointer-sized struct is wrapping a GC object + info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); + useType = getJitGCType(gcPtr); + } + break; default: useType = TYP_UNKNOWN; @@ -802,11 +773,11 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, else #endif // UNIX_AMD64_ABI - // The largest primitive type is 8 bytes (TYP_DOUBLE) + // The largest arg passed in a single register is MAX_PASS_SINGLEREG_BYTES, // so we can skip calling getPrimitiveTypeForStruct when we // have a struct that is larger than that. // - if (structSize <= sizeof(double)) + if (structSize <= MAX_PASS_SINGLEREG_BYTES) { // We set the "primitive" useType based upon the structSize // and also examine the clsHnd to see if it is an HFA of count one @@ -829,14 +800,21 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // if (structSize <= MAX_PASS_MULTIREG_BYTES) { - // Structs that are HFA's are passed by value in multiple registers - if (IsHfa(clsHnd) + // Structs that are HFA/HVA's are passed by value in multiple registers. + // Arm64 Windows VarArg methods arguments will not classify HFA/HVA types, they will need to be treated + // as if they are not HFA/HVA types. + var_types hfaType; #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) - && !isVarArg // Arm64 Windows VarArg methods arguments will not - // classify HFA types, they will need to be treated - // as if they are not HFA types. -#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) - ) + if (isVarArg) + { + hfaType = TYP_UNDEF; + } + else +#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) + { + hfaType = GetHfaType(clsHnd); + } + if (varTypeIsValidHfaType(hfaType)) { // HFA's of count one should have been handled by getPrimitiveTypeForStruct assert(GetHfaCount(clsHnd) >= 2); @@ -851,7 +829,6 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, { #ifdef UNIX_AMD64_ABI - // The case of (structDesc.eightByteCount == 1) should have already been handled if ((structDesc.eightByteCount > 1) || !structDesc.passedInRegisters) { @@ -1035,10 +1012,10 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // Check for cases where a small struct is returned in a register // via a primitive type. // - // The largest primitive type is 8 bytes (TYP_DOUBLE) + // The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES // so we can skip calling getPrimitiveTypeForStruct when we // have a struct that is larger than that. - if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= sizeof(double))) + if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES)) { // We set the "primitive" useType based upon the structSize // and also examine the clsHnd to see if it is an HFA of count one @@ -1070,7 +1047,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // because when HFA are enabled, normally we would use two FP registers to pass or return it // // But if we don't have support for multiple register return types, we have to change this. - // Since we what we have an 8-byte struct (float + float) we change useType to TYP_I_IMPL + // Since what we have is an 8-byte struct (float + float) we change useType to TYP_I_IMPL // so that the struct is returned instead using an 8-byte integer register. // if ((FEATURE_MULTIREG_RET == 0) && (useType == TYP_UNKNOWN) && (structSize == (2 * sizeof(float))) && IsHfa(clsHnd)) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index c734b5a96fff..dc1f844a3136 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -134,6 +134,61 @@ const unsigned FLG_CCTOR = (CORINFO_FLG_CONSTRUCTOR | CORINFO_FLG_STATIC); const int BAD_STK_OFFS = 0xBAADF00D; // for LclVarDsc::lvStkOffs #endif +//------------------------------------------------------------------------ +// HFA info shared by LclVarDsc and fgArgTabEntry +//------------------------------------------------------------------------ +#ifdef FEATURE_HFA +enum HfaElemKind : unsigned int +{ + HFA_ELEM_NONE, + HFA_ELEM_FLOAT, + HFA_ELEM_DOUBLE, + HFA_ELEM_SIMD16 +}; +inline bool IsHfa(HfaElemKind kind) +{ + return kind != HFA_ELEM_NONE; +} +inline var_types HfaTypeFromElemKind(HfaElemKind kind) +{ + switch (kind) + { + case HFA_ELEM_FLOAT: + return TYP_FLOAT; + case HFA_ELEM_DOUBLE: + return TYP_DOUBLE; +#ifdef FEATURE_SIMD + case HFA_ELEM_SIMD16: + return TYP_SIMD16; +#endif + case HFA_ELEM_NONE: + return TYP_UNDEF; + default: + assert(!"Invalid HfaElemKind"); + return TYP_UNDEF; + } +} +inline HfaElemKind HfaElemKindFromType(var_types type) +{ + switch (type) + { + case TYP_FLOAT: + return HFA_ELEM_FLOAT; + case TYP_DOUBLE: + return HFA_ELEM_DOUBLE; +#ifdef FEATURE_SIMD + case TYP_SIMD16: + return HFA_ELEM_SIMD16; +#endif + case TYP_UNDEF: + return HFA_ELEM_NONE; + default: + assert(!"Invalid HFA Type"); + return HFA_ELEM_NONE; + } +} +#endif // FEATURE_HFA + // The following holds the Local var info (scope information) typedef const char* VarName; // Actual ASCII string struct VarScopeDsc @@ -595,11 +650,8 @@ class LclVarDsc unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call #ifdef FEATURE_HFA - unsigned char _lvIsHfa : 1; // Is this a struct variable who's class handle is an HFA type - unsigned char _lvIsHfaRegArg : 1; // Is this a HFA argument variable? // TODO-CLEANUP: Remove this and replace - // with (lvIsRegArg && lvIsHfa()) - unsigned char _lvHfaTypeIsFloat : 1; // Is the HFA type float or double? -#endif // FEATURE_HFA + HfaElemKind _lvHfaElemKind : 2; // What kind of an HFA this is (HFA_ELEM_NONE if it is not an HFA). +#endif // FEATURE_HFA #ifdef DEBUG // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct @@ -666,70 +718,60 @@ class LclVarDsc bool lvIsHfa() const { #ifdef FEATURE_HFA - return _lvIsHfa; + return IsHfa(_lvHfaElemKind); #else return false; #endif } - void lvSetIsHfa() - { -#ifdef FEATURE_HFA - _lvIsHfa = true; -#endif - } - bool lvIsHfaRegArg() const { #ifdef FEATURE_HFA - return _lvIsHfaRegArg; + return lvIsRegArg && lvIsHfa(); #else return false; #endif } - void lvSetIsHfaRegArg(bool value = true) - { -#ifdef FEATURE_HFA - _lvIsHfaRegArg = value; -#endif - } - - bool lvHfaTypeIsFloat() const - { -#ifdef FEATURE_HFA - return _lvHfaTypeIsFloat; -#else - return false; -#endif - } - - void lvSetHfaTypeIsFloat(bool value) - { -#ifdef FEATURE_HFA - _lvHfaTypeIsFloat = value; -#endif - } - - // on Arm64 - Returns 1-4 indicating the number of register slots used by the HFA - // on Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8 + //------------------------------------------------------------------------------ + // lvHfaSlots: Get the number of slots used by an HFA local + // + // Return Value: + // On Arm64 - Returns 1-4 indicating the number of register slots used by the HFA + // On Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8 // unsigned lvHfaSlots() const { assert(lvIsHfa()); assert(varTypeIsStruct(lvType)); + unsigned slots = 0; #ifdef _TARGET_ARM_ - return lvExactSize / sizeof(float); -#else // _TARGET_ARM64_ - if (lvHfaTypeIsFloat()) - { - return lvExactSize / sizeof(float); - } - else + slots = lvExactSize / sizeof(float); + assert(slots <= 8); +#elif defined(_TARGET_ARM64_) + switch (_lvHfaElemKind) { - return lvExactSize / sizeof(double); + case HFA_ELEM_NONE: + assert(!"lvHfaSlots called for non-HFA"); + break; + case HFA_ELEM_FLOAT: + assert((lvExactSize % 4) == 0); + slots = lvExactSize >> 2; + break; + case HFA_ELEM_DOUBLE: + assert((lvExactSize % 8) == 0); + slots = lvExactSize >> 3; + break; + case HFA_ELEM_SIMD16: + assert((lvExactSize % 16) == 0); + slots = lvExactSize >> 4; + break; + default: + unreached(); } + assert(slots <= 4); #endif // _TARGET_ARM64_ + return slots; } // lvIsMultiRegArgOrRet() @@ -750,7 +792,7 @@ class LclVarDsc regNumberSmall _lvOtherReg; // Used for "upper half" of long var. #endif // !defined(_TARGET_64BIT_) - regNumberSmall _lvArgReg; // The register in which this argument is passed. + regNumberSmall _lvArgReg; // The (first) register in which this argument is passed. #if FEATURE_MULTIREG_ARGS regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register. @@ -1030,14 +1072,21 @@ class LclVarDsc { return isFloatRegType(lvType) || lvIsHfaRegArg(); } + var_types GetHfaType() const { - return lvIsHfa() ? (lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE) : TYP_UNDEF; +#ifdef FEATURE_HFA + assert(lvIsHfa()); + return HfaTypeFromElemKind(_lvHfaElemKind); +#endif // FEATURE_HFA + return TYP_UNDEF; } + void SetHfaType(var_types type) { - assert(varTypeIsFloating(type)); - lvSetHfaTypeIsFloat(type == TYP_FLOAT); +#ifdef FEATURE_HFA + _lvHfaElemKind = HfaElemKindFromType(type); +#endif // FEATURE_HFA } var_types lvaArgType(); @@ -1487,8 +1536,7 @@ struct fgArgTabEntry bool _isSplit : 1; // True when this argument is split between the registers and OutArg area #endif // FEATURE_ARG_SPLIT #ifdef FEATURE_HFA - bool _isHfaArg : 1; // True when the argument is an HFA type. - bool _isDoubleHfa : 1; // True when the argument is an HFA, with an element type of DOUBLE. + HfaElemKind _hfaElemKind : 2; // What kind of an HFA this is (HFA_ELEM_NONE if it is not an HFA). #endif bool isLateArg() @@ -1569,7 +1617,7 @@ struct fgArgTabEntry bool getIsHfaArg() { #ifdef FEATURE_HFA - return _isHfaArg; + return IsHfa(_hfaElemKind); #else return false; #endif @@ -1579,23 +1627,22 @@ struct fgArgTabEntry bool getIsHfaRegArg() { #ifdef FEATURE_HFA - return _isHfaArg && isPassedInRegisters(); + return IsHfa(_hfaElemKind) && isPassedInRegisters(); #else return false; #endif } - __declspec(property(get = getHfaType)) var_types hfaType; - var_types getHfaType() + __declspec(property(get = GetHfaType)) var_types hfaType; + var_types GetHfaType() { #ifdef FEATURE_HFA - return _isHfaArg ? (_isDoubleHfa ? TYP_DOUBLE : TYP_FLOAT) : TYP_UNDEF; -#else + return HfaTypeFromElemKind(_hfaElemKind); +#endif // FEATURE_HFA return TYP_UNDEF; -#endif } - void setHfaType(var_types type, unsigned hfaSlots) + void SetHfaType(var_types type, unsigned hfaSlots) { #ifdef FEATURE_HFA if (type != TYP_UNDEF) @@ -1607,29 +1654,33 @@ struct fgArgTabEntry // Note that hfaSlots is the number of registers we will use. For ARM, that is twice // the number of "double registers". unsigned numHfaRegs = hfaSlots; - if (isPassedInRegisters()) - { #ifdef _TARGET_ARM_ - if (type == TYP_DOUBLE) - { - // Must be an even number of registers. - assert((numRegs & 1) == 0); - numHfaRegs = hfaSlots / 2; - } + if (type == TYP_DOUBLE) + { + // Must be an even number of registers. + assert((numRegs & 1) == 0); + numHfaRegs = hfaSlots / 2; + } #endif // _TARGET_ARM_ - if (_isHfaArg) + + if (!isHfaArg) + { + // We haven't previously set this; do so now. + _hfaElemKind = HfaElemKindFromType(type); + if (isPassedInRegisters()) { - // This should already be set correctly. - assert(numRegs == numHfaRegs); - assert(_isDoubleHfa == (type == TYP_DOUBLE)); + numRegs = numHfaRegs; } - else + } + else + { + // We've already set this; ensure that it's consistent. + if (isPassedInRegisters()) { - numRegs = numHfaRegs; + assert(numRegs == numHfaRegs); } + assert(type == HfaTypeFromElemKind(_hfaElemKind)); } - _isDoubleHfa = (type == TYP_DOUBLE); - _isHfaArg = true; } #endif // FEATURE_HFA } @@ -1701,22 +1752,30 @@ struct fgArgTabEntry { unsigned size = getSlotCount(); #ifdef FEATURE_HFA -#ifdef _TARGET_ARM_ - // We counted the number of regs, but if they are DOUBLE hfa regs we have to double the size. - if (isHfaRegArg && (hfaType == TYP_DOUBLE)) + if (isHfaRegArg) { - assert(!isSplit); - size <<= 1; - } +#ifdef _TARGET_ARM_ + // We counted the number of regs, but if they are DOUBLE hfa regs we have to double the size. + if (hfaType == TYP_DOUBLE) + { + assert(!isSplit); + size <<= 1; + } #elif defined(_TARGET_ARM64_) - // We counted the number of regs, but if they are FLOAT hfa regs we have to halve the size. - if (isHfaRegArg && (hfaType == TYP_FLOAT)) - { - // Round up in case of odd HFA count. - size = (size + 1) >> 1; - } + // We counted the number of regs, but if they are FLOAT hfa regs we have to halve the size, + // or if they are SIMD16 vector hfa regs we have to double the size. + if (hfaType == TYP_FLOAT) + { + // Round up in case of odd HFA count. + size = (size + 1) >> 1; + } + else if (hfaType == TYP_SIMD16) + { + size <<= 1; + } #endif // _TARGET_ARM64_ -#endif + } +#endif // FEATURE_HFA return size; } @@ -7614,6 +7673,17 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Should we support SIMD intrinsics? bool featureSIMD; + // Should we recognize SIMD types? + // We always do this on ARM64 to support HVA types. + bool supportSIMDTypes() + { +#ifdef _TARGET_ARM64_ + return true; +#else + return featureSIMD; +#endif + } + // Have we identified any SIMD types? // This is currently used by struct promotion to avoid getting type information for a struct // field to see if it is a SIMD type, if we haven't seen any SIMD types or operations in diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index 901a58ec9e35..0b30114f6768 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -2919,7 +2919,7 @@ inline regNumber genMapFloatRegArgNumToRegNum(unsigned argNum) __forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type) { - if (varTypeIsFloating(type)) + if (varTypeUsesFloatArgReg(type)) { return genMapFloatRegArgNumToRegNum(argNum); } @@ -2957,7 +2957,7 @@ inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum) __forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type) { regMaskTP result; - if (varTypeIsFloating(type)) + if (varTypeUsesFloatArgReg(type)) { result = genMapFloatRegArgNumToRegMask(argNum); #ifdef _TARGET_ARM_ @@ -3076,7 +3076,7 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum) inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type) { - if (varTypeIsFloating(type)) + if (varTypeUsesFloatArgReg(type)) { return genMapFloatRegNumToRegArgNum(regNum); } diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index b24ea0e42256..236302cca5ae 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -23273,7 +23273,7 @@ GenTreeStmt* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) if (varTypeIsStruct(argType)) { structHnd = gtGetStructHandleIfPresent(argNode); - noway_assert(structHnd != NO_CLASS_HANDLE); + noway_assert((structHnd != NO_CLASS_HANDLE) || (argType != TYP_STRUCT)); } // Unsafe value cls check is not needed for diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index be064c285afa..9e58db0df4fe 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -17682,7 +17682,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode( assert(op1 != nullptr); SetOpLclRelatedToSIMDIntrinsic(op1); - return new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size); + GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size); + return simdNode; } GenTreeSIMD* Compiler::gtNewSIMDNode( @@ -17692,7 +17693,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode( SetOpLclRelatedToSIMDIntrinsic(op1); SetOpLclRelatedToSIMDIntrinsic(op2); - return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size); + GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size); + return simdNode; } //------------------------------------------------------------------- @@ -18064,7 +18066,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA case Compiler::SPK_PrimitiveType: { assert(returnType != TYP_UNKNOWN); - assert(!varTypeIsStruct(returnType)); + assert(returnType != TYP_STRUCT); m_regType[0] = returnType; break; } @@ -18075,7 +18077,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA var_types hfaType = comp->GetHfaType(retClsHnd); // We should have an hfa struct type - assert(varTypeIsFloating(hfaType)); + assert(varTypeIsValidHfaType(hfaType)); // Note that the retail build issues a warning about a potential divsion by zero without this Max function unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType))); diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 5d45427a395b..b294748b67c8 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -3532,6 +3532,9 @@ struct GenTreeCall final : public GenTree return varTypeIsLong(gtType); #elif FEATURE_MULTIREG_RET && defined(_TARGET_ARM_) return varTypeIsLong(gtType) || (varTypeIsStruct(gtType) && !HasRetBufArg()); +#elif defined(FEATURE_HFA) && defined(_TARGET_ARM64_) + // SIMD types are returned in vector regs on ARM64. + return (gtType == TYP_STRUCT) && !HasRetBufArg(); #elif FEATURE_MULTIREG_RET return varTypeIsStruct(gtType) && !HasRetBufArg(); #else diff --git a/src/jit/hwintrinsicArm64.cpp b/src/jit/hwintrinsicArm64.cpp index 98b495623855..dec60383bd2e 100644 --- a/src/jit/hwintrinsicArm64.cpp +++ b/src/jit/hwintrinsicArm64.cpp @@ -214,8 +214,8 @@ GenTree* Compiler::addRangeCheckIfNeeded(GenTree* immOp, unsigned int max, bool { assert(immOp != nullptr); - // Need to range check only if we're must expand and don't have an appropriate constant - if (mustExpand && (!immOp->IsCnsIntOrI() || (immOp->AsIntConCommon()->IconValue() < max))) + // Need to range check only if we're must expand. + if (mustExpand) { GenTree* upperBoundNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, max); GenTree* index = nullptr; @@ -463,20 +463,40 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, return gtNewSimdHWIntrinsicNode(simdType, op1, intrinsic, simdBaseType, simdSizeBytes); case HWIntrinsicInfo::SimdExtractOp: - op2 = - addRangeCheckIfNeeded(impPopStack().val, getSIMDVectorLength(simdSizeBytes, simdBaseType), mustExpand); + { + int vectorLength = getSIMDVectorLength(simdSizeBytes, simdBaseType); + op2 = impStackTop().val; + if (!mustExpand && (!op2->IsCnsIntOrI() || op2->AsIntConCommon()->IconValue() >= vectorLength)) + { + // This is either an out-of-range constant or a non-constant. + // We won't expand it; it will be handled recursively, at which point 'mustExpand' + // will be true. + return nullptr; + } + op2 = impPopStack().val; + op2 = addRangeCheckIfNeeded(op2, vectorLength, mustExpand); op1 = impSIMDPopStack(simdType); return gtNewScalarHWIntrinsicNode(JITtype2varType(sig->retType), op1, op2, intrinsic); - + } case HWIntrinsicInfo::SimdInsertOp: + { + int vectorLength = getSIMDVectorLength(simdSizeBytes, simdBaseType); + op2 = impStackTop(1).val; + if (!mustExpand && (!op2->IsCnsIntOrI() || op2->AsIntConCommon()->IconValue() >= vectorLength)) + { + // This is either an out-of-range constant or a non-constant. + // We won't expand it; it will be handled recursively, at which point 'mustExpand' + // will be true. + return nullptr; + } op3 = impPopStack().val; - op2 = - addRangeCheckIfNeeded(impPopStack().val, getSIMDVectorLength(simdSizeBytes, simdBaseType), mustExpand); + op2 = impPopStack().val; + op2 = addRangeCheckIfNeeded(op2, vectorLength, mustExpand); op1 = impSIMDPopStack(simdType); return gtNewSimdHWIntrinsicNode(simdType, op1, op2, op3, intrinsic, simdBaseType, simdSizeBytes); - + } case HWIntrinsicInfo::Sha1HashOp: op3 = impSIMDPopStack(simdType); op2 = impPopStack().val; diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 8600bf304cc5..88638092fe90 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -1217,7 +1217,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, // If it is a multi-reg struct return, don't change the oper to GT_LCL_FLD. // That is, the IR will be of the form lclVar = call for multi-reg return // - GenTree* lcl = destAddr->gtOp.gtOp1; + GenTreeLclVar* lcl = destAddr->gtOp.gtOp1->AsLclVar(); if (src->AsCall()->HasMultiRegRetVal()) { // Mark the struct LclVar as used in a MultiReg return context @@ -1227,7 +1227,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, lcl->gtFlags |= GTF_DONT_CSE; lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true; } - else // The call result is not a multireg return + else if (lcl->gtType != src->gtType) { // We change this to a GT_LCL_FLD (from a GT_ADDR of a GT_LCL_VAR) lcl->ChangeOper(GT_LCL_FLD); @@ -1532,7 +1532,7 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd, #ifdef FEATURE_SIMD // Check to see if this is a SIMD type. - if (featureSIMD && !mayContainGCPtrs) + if (supportSIMDTypes() && !mayContainGCPtrs) { unsigned originalSize = info.compCompHnd->getClassSize(structHnd); @@ -9057,7 +9057,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op, CORINFO_CLASS_HANDLE re { // It is possible that we now have a lclVar of scalar type. // If so, don't transform it to GT_LCL_FLD. - if (varTypeIsStruct(lvaTable[op->AsLclVar()->gtLclNum].lvType)) + if (lvaTable[op->AsLclVar()->gtLclNum].lvType != info.compRetNativeType) { op->ChangeOper(GT_LCL_FLD); } @@ -18983,7 +18983,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) if ((!foundSIMDType || (type == TYP_STRUCT)) && isSIMDorHWSIMDClass(&(lclVarInfo[i + argCnt].lclVerTypeInfo))) { foundSIMDType = true; - if (featureSIMD && type == TYP_STRUCT) + if (supportSIMDTypes() && type == TYP_STRUCT) { var_types structType = impNormStructType(lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle()); lclVarInfo[i + argCnt].lclTypeInfo = structType; diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 6f34e24f32f7..d7d0f1113451 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -124,7 +124,7 @@ void Compiler::lvaInitTypeRef() info.compILargsCount = info.compArgsCount; #ifdef FEATURE_SIMD - if (featureSIMD && (info.compRetNativeType == TYP_STRUCT)) + if (supportSIMDTypes() && (info.compRetNativeType == TYP_STRUCT)) { var_types structType = impNormStructType(info.compMethodInfo->args.retTypeClass); info.compRetType = structType; @@ -149,7 +149,7 @@ void Compiler::lvaInitTypeRef() if ((howToReturnStruct == SPK_PrimitiveType) || (howToReturnStruct == SPK_EnclosingType)) { assert(returnType != TYP_UNKNOWN); - assert(!varTypeIsStruct(returnType)); + assert(returnType != TYP_STRUCT); info.compRetNativeType = returnType; @@ -397,7 +397,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo) { varDsc->lvType = TYP_BYREF; #ifdef FEATURE_SIMD - if (featureSIMD) + if (supportSIMDTypes()) { var_types simdBaseType = TYP_UNKNOWN; var_types type = impNormStructType(info.compClassHnd, nullptr, nullptr, &simdBaseType); @@ -505,7 +505,7 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo) } } #ifdef FEATURE_SIMD - else if (featureSIMD && varTypeIsSIMD(info.compRetType)) + else if (supportSIMDTypes() && varTypeIsSIMD(info.compRetType)) { varDsc->lvSIMDType = true; varDsc->lvBaseType = @@ -598,8 +598,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) // If the argType is a struct, then check if it is an HFA if (varTypeIsStruct(argType)) { - hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF - isHfaArg = varTypeIsFloating(hfaType); + // hfaType is set to float, double or SIMD type if it is an HFA, otherwise TYP_UNDEF. + hfaType = GetHfaType(typeHnd); + isHfaArg = varTypeIsValidHfaType(hfaType); } } else if (info.compIsVarArgs) @@ -616,11 +617,12 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) if (isHfaArg) { - // We have an HFA argument, so from here on out treat the type as a float or double. + // We have an HFA argument, so from here on out treat the type as a float, double or vector. // The orginal struct type is available by using origArgType // We also update the cSlots to be the number of float/double fields in the HFA argType = hfaType; - cSlots = varDsc->lvHfaSlots(); + varDsc->SetHfaType(hfaType); + cSlots = varDsc->lvHfaSlots(); } // The number of slots that must be enregistered if we are to consider this argument enregistered. // This is normally the same as cSlots, since we normally either enregister the entire object, @@ -818,18 +820,31 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) if (isHfaArg) { // We need to save the fact that this HFA is enregistered - varDsc->lvSetIsHfa(); - varDsc->lvSetIsHfaRegArg(); - varDsc->SetHfaType(hfaType); - varDsc->lvIsMultiRegArg = (varDsc->lvHfaSlots() > 1); + // Note that we can have HVAs of SIMD types even if we are not recognizing intrinsics. + // In that case, we won't have normalized the vector types on the varDsc, so if we have a single vector + // register, we need to set the type now. Otherwise, later we'll assume this is passed by reference. + if (varDsc->lvHfaSlots() != 1) + { + varDsc->lvIsMultiRegArg = true; + } } varDsc->lvIsRegArg = 1; #if FEATURE_MULTIREG_ARGS +#ifdef _TARGET_ARM64_ + if (argType == TYP_STRUCT) + { + varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL); + if (cSlots == 2) + { + varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL); + varDscInfo->hasMultiSlotStruct = true; + } + } +#elif defined(UNIX_AMD64_ABI) if (varTypeIsStruct(argType)) { -#if defined(UNIX_AMD64_ABI) varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType); // If there is a second eightbyte, get a register for it too and map the arg to the reg number. @@ -844,17 +859,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) { varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType); } -#else // ARM32 or ARM64 + } +#else // ARM32 + if (varTypeIsStruct(argType)) + { varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL); -#ifdef _TARGET_ARM64_ - if (cSlots == 2) - { - varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL); - varDscInfo->hasMultiSlotStruct = true; - } -#endif // _TARGET_ARM64_ -#endif // defined(UNIX_AMD64_ABI) } +#endif // ARM32 else #endif // FEATURE_MULTIREG_ARGS { @@ -879,14 +890,13 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) isFloat = varTypeIsFloating(firstEightByteType); } else -#else +#endif // !UNIX_AMD64_ABI { isFloat = varTypeIsFloating(argType); } -#endif // !UNIX_AMD64_ABI #if defined(UNIX_AMD64_ABI) - if (varTypeIsStruct(argType)) + if (varTypeIsStruct(argType)) { // Print both registers, just to be clear if (firstEightByteType == TYP_UNDEF) @@ -1270,7 +1280,11 @@ void Compiler::lvaInitVarDsc(LclVarDsc* varDsc, varDsc->lvStructGcCount = 1; } - // Set the lvType (before this point it is TYP_UNDEF). +// Set the lvType (before this point it is TYP_UNDEF). + +#ifdef FEATURE_HFA + varDsc->SetHfaType(TYP_UNDEF); +#endif if ((varTypeIsStruct(type))) { lvaSetStruct(varNum, typeHnd, typeHnd != nullptr, !tiVerificationNeeded); @@ -2513,10 +2527,9 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool if (varDsc->lvExactSize <= MAX_PASS_MULTIREG_BYTES) { var_types hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF - if (varTypeIsFloating(hfaType)) + if (varTypeIsValidHfaType(hfaType)) { - varDsc->_lvIsHfa = true; - varDsc->lvSetHfaTypeIsFloat(hfaType == TYP_FLOAT); + varDsc->SetHfaType(hfaType); // hfa variables can never contain GC pointers assert(varDsc->lvStructGcCount == 0); @@ -2588,8 +2601,7 @@ void Compiler::lvaSetStructUsedAsVarArg(unsigned varNum) LclVarDsc* varDsc = &lvaTable[varNum]; // For varargs methods incoming and outgoing arguments should not be treated // as HFA. - varDsc->_lvIsHfa = false; - varDsc->_lvHfaTypeIsFloat = false; + varDsc->SetHfaType(TYP_UNDEF); #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) #endif // FEATURE_HFA } @@ -6913,16 +6925,9 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r } } - if (varDsc->lvIsHfaRegArg()) + if (varDsc->lvIsHfa()) { - if (varDsc->lvHfaTypeIsFloat()) - { - printf(" (enregistered HFA: float) "); - } - else - { - printf(" (enregistered HFA: double)"); - } + printf(" HFA(%s) ", varTypeName(varDsc->GetHfaType())); } if (varDsc->lvDoNotEnregister) diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index a2f7080a9d40..75c354dc3884 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -5691,10 +5691,11 @@ void Lowering::ContainCheckRet(GenTreeOp* ret) { GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon(); LclVarDsc* varDsc = &(comp->lvaTable[lclVarCommon->gtLclNum]); - assert(varDsc->lvIsMultiRegRet); + // This must be a multi-reg return or an HFA of a single element. + assert(varDsc->lvIsMultiRegRet || (varDsc->lvIsHfa() && varTypeIsValidHfaType(varDsc->lvType))); // Mark var as contained if not enregistrable. - if (!varTypeIsEnregisterableStruct(op1)) + if (!varTypeIsEnregisterable(op1)) { MakeSrcContained(ret, op1); } diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index 3e0d636e9921..adfc34ae94d5 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -892,6 +892,11 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { MakeSrcContained(node, op2); +#if 0 + // This is currently not supported downstream. The following (at least) need to be modifed: + // GenTree::isContainableHWIntrinsic() needs to handle this. + // CodeGen::genConsumRegs() + // GenTree* op3 = argList->Rest()->Rest()->Current(); // In the HW intrinsics C# API there is no direct way to specify a vector element to element mov @@ -909,6 +914,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) MakeSrcContained(node, op3); } } +#endif } break; diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp index 8a5323ced2bd..adc418c2a0b2 100644 --- a/src/jit/lsra.cpp +++ b/src/jit/lsra.cpp @@ -1415,7 +1415,7 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc) // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning") // references when using the general GC encoding. unsigned lclNum = (unsigned)(varDsc - compiler->lvaTable); - if (varDsc->lvAddrExposed || !varTypeIsEnregisterableStruct(varDsc)) + if (varDsc->lvAddrExposed || !varTypeIsEnregisterable(varDsc)) { #ifdef DEBUG Compiler::DoNotEnregisterReason dner = Compiler::DNER_AddrExposed; diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index 251bf53ab267..9a54c3227978 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -208,7 +208,7 @@ int LinearScan::BuildCall(GenTreeCall* call) assert(retTypeDesc != nullptr); dstCandidates = retTypeDesc->GetABIReturnRegs(); } - else if (varTypeIsFloating(registerType)) + else if (varTypeUsesFloatArgReg(registerType)) { dstCandidates = RBM_FLOATRET; } diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp index da1fa8f7d37b..1b0f61ddcca2 100644 --- a/src/jit/lsrabuild.cpp +++ b/src/jit/lsrabuild.cpp @@ -1841,15 +1841,7 @@ void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) { RegState* intRegState = &compiler->codeGen->intRegState; RegState* floatRegState = &compiler->codeGen->floatRegState; - // In the case of AMD64 we'll still use the floating point registers - // to model the register usage for argument on vararg calls, so - // we will ignore the varargs condition to determine whether we use - // XMM registers or not for setting up the call. - bool isFloat = (isFloatRegType(argDsc->lvType) -#ifndef _TARGET_AMD64_ - && !compiler->info.compIsVarArgs -#endif - && !compiler->opts.compUseSoftFP); + bool isFloat = emitter::isFloatReg(argDsc->lvArgReg); if (argDsc->lvIsHfaRegArg()) { @@ -3070,6 +3062,15 @@ int LinearScan::BuildReturn(GenTree* tree) regMaskTP useCandidates = RBM_NONE; #if FEATURE_MULTIREG_RET +#ifdef _TARGET_ARM64_ + if (varTypeIsSIMD(tree)) + { + useCandidates = allSIMDRegs(); + BuildUse(op1, useCandidates); + return 1; + } +#endif // !_TARGET_ARM64_ + if (varTypeIsStruct(tree)) { // op1 has to be either an lclvar or a multi-reg returning call @@ -3209,7 +3210,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) GenTreeObj* obj = op1->AsObj(); GenTree* addr = obj->Addr(); unsigned size = obj->gtBlkSize; - assert(size <= TARGET_POINTER_SIZE); + assert(size <= MAX_PASS_SINGLEREG_BYTES); if (addr->OperIsLocalAddr()) { // We don't need a source register. diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 50143d3c7fb2..fbfa17331191 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -828,6 +828,7 @@ void fgArgTabEntry::Dump() { printf("fgArgTabEntry[arg %u", argNum); printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper)); + printf(" %s", varTypeName(argType)); if (regNum != REG_STK) { printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s"); @@ -867,7 +868,7 @@ void fgArgTabEntry::Dump() } if (isHfaRegArg) { - printf(", isHfa"); + printf(", isHfa(%s)", varTypeName(GetHfaType())); } if (isBackFilled) { @@ -1140,6 +1141,7 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, curArgTabEntry->argNum = argNum; curArgTabEntry->node = node; + curArgTabEntry->argType = node->TypeGet(); curArgTabEntry->parent = parent; curArgTabEntry->slotNum = 0; curArgTabEntry->numRegs = numRegs; @@ -1153,7 +1155,7 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned argNum, curArgTabEntry->needPlace = false; curArgTabEntry->processed = false; #ifdef FEATURE_HFA - curArgTabEntry->_isHfaArg = false; + curArgTabEntry->_hfaElemKind = HFA_ELEM_NONE; #endif curArgTabEntry->isBackFilled = false; curArgTabEntry->isNonStandard = false; @@ -1213,6 +1215,7 @@ fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, curArgTabEntry->setRegNum(0, REG_STK); curArgTabEntry->argNum = argNum; curArgTabEntry->node = node; + curArgTabEntry->argType = node->TypeGet(); curArgTabEntry->parent = parent; curArgTabEntry->slotNum = nextSlotNum; curArgTabEntry->numRegs = 0; @@ -1226,7 +1229,7 @@ fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, curArgTabEntry->needPlace = false; curArgTabEntry->processed = false; #ifdef FEATURE_HFA - curArgTabEntry->_isHfaArg = false; + curArgTabEntry->_hfaElemKind = HFA_ELEM_NONE; #endif curArgTabEntry->isBackFilled = false; curArgTabEntry->isNonStandard = false; @@ -2300,12 +2303,16 @@ void fgArgInfo::EvalArgsToTemps() { setupArg = compiler->fgMorphCopyBlock(setupArg); #if defined(_TARGET_ARMARCH_) || defined(UNIX_AMD64_ABI) - // This scalar LclVar widening step is only performed for ARM and AMD64 unix. - // - CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum); - unsigned structSize = varDsc->lvExactSize; + if (lclVarType == TYP_STRUCT) + { + // This scalar LclVar widening step is only performed for ARM architectures. + // + CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum); + unsigned structSize = varDsc->lvExactSize; - scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg); + scalarType = + compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg); + } #endif // _TARGET_ARMARCH_ || defined (UNIX_AMD64_ABI) } @@ -2391,7 +2398,7 @@ void fgArgInfo::EvalArgsToTemps() #else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI) - if (varTypeIsStruct(defArg)) + if (defArg->TypeGet() == TYP_STRUCT) { clsHnd = compiler->gtGetStructHandleIfPresent(defArg); noway_assert(clsHnd != NO_CLASS_HANDLE); @@ -3079,7 +3086,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #ifdef FEATURE_HFA hfaType = GetHfaType(argx); - isHfaArg = varTypeIsFloating(hfaType); + isHfaArg = varTypeIsValidHfaType(hfaType); #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) // Make sure for vararg methods isHfaArg is not true. @@ -3628,7 +3635,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #ifdef FEATURE_HFA if (isHfaArg) { - newArgEntry->setHfaType(hfaType, hfaSlots); + newArgEntry->SetHfaType(hfaType, hfaSlots); } #endif // FEATURE_HFA newArgEntry->SetMultiRegNums(); @@ -3872,7 +3879,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) { if (isPow2(passingSize)) { - canTransform = true; + canTransform = (!argEntry->isHfaArg || (passingSize == genTypeSize(argEntry->GetHfaType()))); } #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI) @@ -3957,15 +3964,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } else { - // We have a struct argument that's less than pointer size, and it is either a power of 2, + // We have a struct argument that fits into a register, and it is either a power of 2, // or a local. - // Change our GT_OBJ into a GT_IND of the correct type. + // Change our argument, as needed, into a value of the appropriate type. CLANG_FORMAT_COMMENT_ANCHOR; #ifdef _TARGET_ARM_ assert((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2))); #else - assert(size == 1); + assert((size == 1) || + (varTypeIsSIMD(structBaseType) && size == (genTypeSize(structBaseType) / REGSIZE_BYTES))); #endif assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize)); @@ -4012,7 +4020,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // we will use the first and only promoted field argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart); - if (varTypeCanReg(fieldVarDsc->TypeGet()) && + if (varTypeIsEnregisterable(fieldVarDsc->TypeGet()) && (genTypeSize(fieldVarDsc->TypeGet()) == originalSize)) { // Just use the existing field's type @@ -4025,7 +4033,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) argObj->ChangeOper(GT_LCL_FLD); argObj->gtType = structBaseType; } - assert(varTypeCanReg(argObj->TypeGet())); + assert(varTypeIsEnregisterable(argObj->TypeGet())); assert(copyBlkClass == NO_CLASS_HANDLE); } else @@ -4043,7 +4051,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) copyBlkClass = objClass; } } - else if (!varTypeIsIntegralOrI(varDsc->TypeGet())) + else if (genActualType(varDsc->TypeGet()) != structBaseType) { // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD argObj->ChangeOper(GT_LCL_FLD); @@ -4055,44 +4063,41 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) // Not a GT_LCL_VAR, so we can just change the type on the node argObj->gtType = structBaseType; } - assert(varTypeCanReg(argObj->TypeGet()) || - ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType))); - - size = 1; + assert(varTypeIsEnregisterable(argObj->TypeGet()) || + ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsEnregisterable(structBaseType))); } #endif // !_TARGET_X86_ #ifndef UNIX_AMD64_ABI // We still have a struct unless we converted the GT_OBJ into a GT_IND above... - if (varTypeIsStruct(structBaseType) && !argEntry->passedByRef) + if (isHfaArg && passUsingFloatRegs) { - if (isHfaArg && passUsingFloatRegs) - { - size = argEntry->numRegs; - } - else - { - // If the valuetype size is not a multiple of TARGET_POINTER_SIZE, - // we must copyblk to a temp before doing the obj to avoid - // the obj reading memory past the end of the valuetype - CLANG_FORMAT_COMMENT_ANCHOR; + size = argEntry->numRegs; + } + else if (structBaseType == TYP_STRUCT) + { + // If the valuetype size is not a multiple of TARGET_POINTER_SIZE, + // we must copyblk to a temp before doing the obj to avoid + // the obj reading memory past the end of the valuetype + CLANG_FORMAT_COMMENT_ANCHOR; - if (roundupSize > originalSize) - { - copyBlkClass = objClass; + if (roundupSize > originalSize) + { + copyBlkClass = objClass; - // There are a few special cases where we can omit using a CopyBlk - // where we normally would need to use one. + // There are a few special cases where we can omit using a CopyBlk + // where we normally would need to use one. - if (argObj->gtObj.gtOp1->IsLocalAddrExpr() != nullptr) // Is the source a LclVar? - { - copyBlkClass = NO_CLASS_HANDLE; - } + if (argObj->OperIs(GT_OBJ) && + argObj->AsObj()->gtGetOp1()->IsLocalAddrExpr() != nullptr) // Is the source a LclVar? + { + copyBlkClass = NO_CLASS_HANDLE; } - - size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items } + + size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items } + #endif // !UNIX_AMD64_ABI } } @@ -4159,7 +4164,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #if FEATURE_MULTIREG_ARGS if (isStructArg) { - if (size > 1 || isHfaArg) + if (((argEntry->numRegs + argEntry->numSlots) > 1) || (isHfaArg && argx->TypeGet() == TYP_STRUCT)) { hasMultiregStructArgs = true; } @@ -4376,11 +4381,37 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) } unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->numSlots); - if ((size > 1) || fgEntryPtr->isHfaArg) + if ((size > 1) || (fgEntryPtr->isHfaArg && argx->TypeGet() == TYP_STRUCT)) { foundStructArg = true; if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST)) { + if (fgEntryPtr->isHfaArg) + { + var_types hfaType = fgEntryPtr->hfaType; + unsigned structSize; + if (argx->OperIs(GT_OBJ)) + { + structSize = argx->AsObj()->gtBlkSize; + } + else + { + assert(argx->OperIs(GT_LCL_VAR)); + structSize = lvaGetDesc(argx->AsLclVar()->gtLclNum)->lvExactSize; + } + assert(structSize > 0); + if (structSize == genTypeSize(hfaType)) + { + if (argx->OperIs(GT_OBJ)) + { + fgMorphBlkToInd(argx->AsObj(), hfaType); + } + else + { + argx->gtType = hfaType; + } + } + } arg = fgMorphMultiregStructArg(arg, fgEntryPtr); // Did we replace 'argx' with a new tree? @@ -4490,14 +4521,19 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry #if FEATURE_MULTIREG_ARGS // Examine 'arg' and setup argValue objClass and structSize // - CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); - GenTree* argValue = arg; // normally argValue will be arg, but see right below - unsigned structSize = 0; + CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); + noway_assert(objClass != NO_CLASS_HANDLE); + GenTree* argValue = arg; // normally argValue will be arg, but see right below + unsigned structSize = 0; - if (arg->OperGet() == GT_OBJ) + if (arg->TypeGet() != TYP_STRUCT) + { + structSize = genTypeSize(arg->TypeGet()); + assert(structSize == info.compCompHnd->getClassSize(objClass)); + } + else if (arg->OperGet() == GT_OBJ) { GenTreeObj* argObj = arg->AsObj(); - objClass = argObj->gtClass; structSize = argObj->Size(); assert(structSize == info.compCompHnd->getClassSize(objClass)); @@ -4527,7 +4563,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } else { - objClass = gtGetStructHandleIfPresent(arg); structSize = info.compCompHnd->getClassSize(objClass); } noway_assert(objClass != NO_CLASS_HANDLE); @@ -4538,8 +4573,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry unsigned elemSize = 0; var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0 - hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF - if (varTypeIsFloating(hfaType) + hfaType = fgEntryPtr->hfaType; + if (varTypeIsValidHfaType(hfaType) #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) && !fgEntryPtr->isVararg #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) @@ -4657,8 +4692,13 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) ) { - // We have a HFA struct - noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE)); + // We have a HFA struct. + // Note that GetHfaType may not be the same as elemType, since TYP_SIMD8 is handled the same as TYP_DOUBLE. + var_types useElemType = elemType; +#ifdef _TARGET_ARM64_ + useElemType = (elemType == TYP_SIMD8) ? TYP_DOUBLE : useElemType; +#endif // _TARGET_ARM64_ + noway_assert(useElemType == varDsc->GetHfaType()); noway_assert(elemSize == genTypeSize(elemType)); noway_assert(elemCount == (varDsc->lvExactSize / elemSize)); noway_assert(elemSize * elemCount == varDsc->lvExactSize); @@ -5291,7 +5331,7 @@ void Compiler::fgFixupStructReturn(GenTree* callNode) #if FEATURE_MULTIREG_RET // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer. - assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg); + assert((call->gtType != TYP_STRUCT) || call->HasMultiRegRetVal() || callHasRetBuffArg); #else // !FEATURE_MULTIREG_RET // No more struct returns assert(call->TypeGet() != TYP_STRUCT); @@ -7104,7 +7144,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) #elif defined(_TARGET_ARM64_) // ARM64 var_types hfaType = GetHfaType(argx); - bool isHfaArg = varTypeIsFloating(hfaType); + bool isHfaArg = varTypeIsValidHfaType(hfaType); size_t size = 1; if (isHfaArg) @@ -17414,12 +17454,10 @@ void Compiler::fgMorphLocalField(GenTree* tree, GenTree* parent) // The field must be an enregisterable type; otherwise it would not be a promoted field. // The tree type may not match, e.g. for return types that have been morphed, but both // must be enregisterable types. - // TODO-Cleanup: varTypeCanReg should presumably return true for SIMD types, but - // there may be places where that would violate existing assumptions. var_types treeType = tree->TypeGet(); var_types fieldType = fldVarDsc->TypeGet(); - assert((varTypeCanReg(treeType) || varTypeIsSIMD(treeType)) && - (varTypeCanReg(fieldType) || varTypeIsSIMD(fieldType))); + assert((varTypeIsEnregisterable(treeType) || varTypeIsSIMD(treeType)) && + (varTypeIsEnregisterable(fieldType) || varTypeIsSIMD(fieldType))); tree->ChangeOper(GT_LCL_VAR); assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex); @@ -17474,23 +17512,28 @@ void Compiler::fgMarkImplicitByRefArgs() if (varDsc->lvIsParam && varTypeIsStruct(varDsc)) { - size_t size; + size_t size = varDsc->lvExactSize; + assert(size == info.compCompHnd->getClassSize(varDsc->lvVerTypeInfo.GetClassHandle())); - if (varDsc->lvSize() > REGSIZE_BYTES) + bool isPassedByReference; +#if defined(_TARGET_AMD64_) + isPassedByReference = (size > REGSIZE_BYTES || (size & (size - 1)) != 0); +#elif defined(_TARGET_ARM64_) + if (size > TARGET_POINTER_SIZE) { - size = varDsc->lvSize(); + CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandleForValueClass(); + structPassingKind howToPassStruct; + var_types type = + getArgTypeForStruct(clsHnd, &howToPassStruct, this->info.compIsVarArgs, varDsc->lvExactSize); + isPassedByReference = (howToPassStruct == SPK_ByReference); } else { - CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); - size = info.compCompHnd->getClassSize(typeHnd); + isPassedByReference = false; } - -#if defined(_TARGET_AMD64_) - if (size > REGSIZE_BYTES || (size & (size - 1)) != 0) -#elif defined(_TARGET_ARM64_) - if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc, this->info.compIsVarArgs)) #endif + + if (isPassedByReference) { // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local // So I am now using it to indicate that this is one of the weird implicit @@ -17636,8 +17679,7 @@ void Compiler::fgRetypeImplicitByRefArgs() // the parameter which is really a pointer to the struct. fieldVarDsc->lvIsRegArg = false; fieldVarDsc->lvIsMultiRegArg = false; - fieldVarDsc->lvSetIsHfaRegArg(false); - fieldVarDsc->lvArgReg = REG_NA; + fieldVarDsc->lvArgReg = REG_NA; #if FEATURE_MULTIREG_ARGS fieldVarDsc->lvOtherArgReg = REG_NA; #endif diff --git a/src/jit/register_arg_convention.h b/src/jit/register_arg_convention.h index 28f29b7c13c3..ad20b4a0f543 100644 --- a/src/jit/register_arg_convention.h +++ b/src/jit/register_arg_convention.h @@ -58,7 +58,7 @@ struct InitVarDscInfo // return ref to current register arg for this type unsigned& regArgNum(var_types type) { - return varTypeIsFloating(type) ? floatRegArgNum : intRegArgNum; + return varTypeUsesFloatArgReg(type) ? floatRegArgNum : intRegArgNum; } // Allocate a set of contiguous argument registers. "type" is either an integer @@ -110,7 +110,7 @@ struct InitVarDscInfo // return max register arg for this type unsigned maxRegArgNum(var_types type) { - return varTypeIsFloating(type) ? maxFloatRegArgNum : maxIntRegArgNum; + return varTypeUsesFloatArgReg(type) ? maxFloatRegArgNum : maxIntRegArgNum; } bool enoughAvailRegs(var_types type, unsigned numRegs = 1); diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp index b4cecb3e2046..81494b02361c 100644 --- a/src/jit/simd.cpp +++ b/src/jit/simd.cpp @@ -121,7 +121,7 @@ int Compiler::getSIMDTypeAlignment(var_types simdType) // var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes /*= nullptr */) { - assert(featureSIMD); + assert(supportSIMDTypes()); if (m_simdHandleCache == nullptr) { diff --git a/src/jit/target.h b/src/jit/target.h index 97df447f4d15..a225d1a4bb1c 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -246,6 +246,7 @@ typedef unsigned char regNumberSmall; #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (double). #define MAX_PASS_MULTIREG_BYTES 0 // No multireg arguments (note this seems wrong as MAX_ARG_REG_COUNT is 2) #define MAX_RET_MULTIREG_BYTES 8 // Maximum size of a struct that could be returned in more than one register @@ -540,6 +541,7 @@ typedef unsigned char regNumberSmall; #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (double). #ifdef UNIX_AMD64_ABI #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register @@ -924,6 +926,7 @@ typedef unsigned char regNumberSmall; #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register (including passing HFAs) #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register (including HFA returns) #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (double). #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (Max is an HFA of 4 doubles) #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles) #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA) @@ -1231,9 +1234,10 @@ typedef unsigned char regNumberSmall; #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register - #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (max is 4 doubles using an HFA) - #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles) - #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA) + #define MAX_PASS_SINGLEREG_BYTES 16 // Maximum size of a struct passed in a single register (16-byte vector). + #define MAX_PASS_MULTIREG_BYTES 64 // Maximum size of a struct that could be passed in more than one register (max is 4 16-byte vectors using an HVA) + #define MAX_RET_MULTIREG_BYTES 64 // Maximum size of a struct that could be returned in more than one register (Max is an HVA of 4 16-byte vectors) + #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 128-bit vectors using an HVA) #define MAX_RET_REG_COUNT 4 // Maximum registers used to return a value. #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers @@ -1955,10 +1959,10 @@ inline regNumber regNextOfType(regNumber reg, var_types type) * Type checks */ -inline bool isFloatRegType(int /* s/b "var_types" */ type) +inline bool isFloatRegType(var_types type) { #if CPU_HAS_FP_SUPPORT - return type == TYP_DOUBLE || type == TYP_FLOAT; + return varTypeUsesFloatReg(type); #else return false; #endif diff --git a/src/jit/vartype.h b/src/jit/vartype.h index 04793ea86830..83824ac13576 100644 --- a/src/jit/vartype.h +++ b/src/jit/vartype.h @@ -174,9 +174,9 @@ inline bool varTypeIsI(T vt) } template -inline bool varTypeCanReg(T vt) +inline bool varTypeIsEnregisterable(T vt) { - return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_I | VTF_FLT)) != 0); + return (TypeGet(vt) != TYP_STRUCT); } template @@ -271,9 +271,56 @@ inline bool varTypeIsStruct(T vt) } template -inline bool varTypeIsEnregisterableStruct(T vt) +inline bool varTypeUsesFloatReg(T vt) { - return (TypeGet(vt) != TYP_STRUCT); + // Note that not all targets support SIMD, but if they don't, varTypeIsSIMD will + // always return false. + return varTypeIsFloating(vt) || varTypeIsSIMD(vt); +} + +template +inline bool varTypeUsesFloatArgReg(T vt) +{ +#ifdef _TARGET_ARM64_ + // Arm64 passes SIMD types in floating point registers. + return varTypeUsesFloatReg(vt); +#else + // Other targets pass them as regular structs - by reference or by value. + return varTypeIsFloating(vt); +#endif +} + +//------------------------------------------------------------------------ +// varTypeIsValidHfaType: Determine if the type is a valid HFA type +// +// Arguments: +// vt - the type of interest +// +// Return Value: +// Returns true iff the type is a valid HFA type. +// +// Notes: +// This should only be called with the return value from GetHfaType(). +// The only valid values are TYP_UNDEF, for which this returns false, +// TYP_FLOAT, TYP_DOUBLE, or (ARM64-only) TYP_SIMD*. +// +template +inline bool varTypeIsValidHfaType(T vt) +{ +#ifdef FEATURE_HFA + bool isValid = (TypeGet(vt) != TYP_UNDEF); + if (isValid) + { +#ifdef _TARGET_ARM64_ + assert(varTypeUsesFloatReg(vt)); +#else // !_TARGET_ARM64_ + assert(varTypeIsFloating(vt)); +#endif // !_TARGET_ARM64_ + } + return isValid; +#else // !FEATURE_HFA + return false; +#endif // !FEATURE_HFA } /*****************************************************************************/ diff --git a/src/vm/argdestination.h b/src/vm/argdestination.h index 386ba57c821f..8ddd7b210412 100644 --- a/src/vm/argdestination.h +++ b/src/vm/argdestination.h @@ -60,22 +60,24 @@ class ArgDestination // fieldBytes - size of the structure void CopyHFAStructToRegister(void *src, int fieldBytes) { - // We are either copying either a float or double HFA and need to + // We are copying a float, double or vector HFA/HVA and need to // enregister each field. int floatRegCount = m_argLocDescForStructInRegs->m_cFloatReg; - bool typeFloat = m_argLocDescForStructInRegs->m_isSinglePrecision; + int hfaFieldSize = m_argLocDescForStructInRegs->m_hfaFieldSize; UINT64* dest = (UINT64*) this->GetDestinationAddress(); for (int i = 0; i < floatRegCount; ++i) { // Copy 4 or 8 bytes from src. - UINT64 val = typeFloat ? *((UINT32*)src + i) : *((UINT64*)src + i); + UINT64 val = (hfaFieldSize == 4) ? *((UINT32*)src) : *((UINT64*)src); // Always store 8 bytes *(dest++) = val; - // For now, always zero the next 8 bytes. - // (When HVAs are supported we will get the next 8 bytes from src.) - *(dest++) = 0; + // Either zero the next 8 bytes or get the next 8 bytes from src for 16-byte vector. + *(dest++) = (hfaFieldSize == 16) ? *((UINT64*)src + 1) : 0; + + // Increment src by the appropriate amount. + src = (void*)((char*)src + hfaFieldSize); } } diff --git a/src/vm/arm64/CallDescrWorkerARM64.asm b/src/vm/arm64/CallDescrWorkerARM64.asm index fe277ceb6282..9f2ec2461159 100644 --- a/src/vm/arm64/CallDescrWorkerARM64.asm +++ b/src/vm/arm64/CallDescrWorkerARM64.asm @@ -93,7 +93,7 @@ LNoFloatingPoint bne LNoDoubleReturn LFloatReturn - str d0, [x19, #(CallDescrData__returnValue + 0)] + str q0, [x19, #(CallDescrData__returnValue + 0)] b LReturnDone LNoDoubleReturn @@ -117,6 +117,16 @@ LNoFloatHFAReturn LNoDoubleHFAReturn + ;;VectorHFAReturn return case + cmp w3, #64 + bne LNoVectorHFAReturn + + stp q0, q1, [x19, #(CallDescrData__returnValue + 0)] + stp q2, q3, [x19, #(CallDescrData__returnValue + 0x20)] + b LReturnDone + +LNoVectorHFAReturn + EMIT_BREAKPOINT ; Unreachable LIntReturn diff --git a/src/vm/arm64/asmconstants.h b/src/vm/arm64/asmconstants.h index caffa809eb50..8c99ed841967 100644 --- a/src/vm/arm64/asmconstants.h +++ b/src/vm/arm64/asmconstants.h @@ -61,7 +61,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegi #define CallDescrData__fpReturnSize 0x20 #define CallDescrData__pTarget 0x28 #define CallDescrData__pRetBuffArg 0x30 -#define CallDescrData__returnValue 0x38 +#define CallDescrData__returnValue 0x40 ASMCONSTANTS_C_ASSERT(CallDescrData__pSrc == offsetof(CallDescrData, pSrc)) ASMCONSTANTS_C_ASSERT(CallDescrData__numStackSlots == offsetof(CallDescrData, numStackSlots)) diff --git a/src/vm/arm64/asmhelpers.asm b/src/vm/arm64/asmhelpers.asm index 5883597aaa37..71e53d3d09e8 100644 --- a/src/vm/arm64/asmhelpers.asm +++ b/src/vm/arm64/asmhelpers.asm @@ -705,8 +705,9 @@ NoFloatingPointRetVal ; x0 = fpRetSize - ; return value is stored before float argument registers - add x1, sp, #(__PWTB_FloatArgumentRegisters - 0x20) + ; The return value is stored before float argument registers + ; The maximum size of a return value is 0x40 (HVA of 4x16) + add x1, sp, #(__PWTB_FloatArgumentRegisters - 0x40) bl setStubReturnValue EPILOG_WITH_TRANSITION_BLOCK_RETURN diff --git a/src/vm/arm64/calldescrworkerarm64.S b/src/vm/arm64/calldescrworkerarm64.S index f987d402ddee..8e8084ba3496 100644 --- a/src/vm/arm64/calldescrworkerarm64.S +++ b/src/vm/arm64/calldescrworkerarm64.S @@ -85,7 +85,7 @@ LOCAL_LABEL(NoFloatingPoint): bne LOCAL_LABEL(NoDoubleReturn) LOCAL_LABEL(FloatReturn): - str d0, [x19, #(CallDescrData__returnValue + 0)] + str q0, [x19, #(CallDescrData__returnValue + 0)] b LOCAL_LABEL(ReturnDone) LOCAL_LABEL(NoDoubleReturn): @@ -97,6 +97,7 @@ LOCAL_LABEL(NoDoubleReturn): stp s0, s1, [x19, #(CallDescrData__returnValue + 0)] stp s2, s3, [x19, #(CallDescrData__returnValue + 0x08)] b LOCAL_LABEL(ReturnDone) + LOCAL_LABEL(NoFloatHFAReturn): //DoubleHFAReturn return case @@ -109,6 +110,16 @@ LOCAL_LABEL(NoFloatHFAReturn): LOCAL_LABEL(NoDoubleHFAReturn): + //VectorHFAReturn return case + cmp w3, #64 + bne LOCAL_LABEL(LNoVectorHFAReturn) + + stp q0, q1, [x19, #(CallDescrData__returnValue + 0)] + stp q2, q3, [x19, #(CallDescrData__returnValue + 0x20)] + b LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(LNoVectorHFAReturn): + EMIT_BREAKPOINT // Unreachable LOCAL_LABEL(IntReturn): diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h index fd1fbafe96da..a2cac4eb7c20 100644 --- a/src/vm/arm64/cgencpu.h +++ b/src/vm/arm64/cgencpu.h @@ -51,7 +51,7 @@ extern PCODE GetPreStubEntryPoint(); #define CACHE_LINE_SIZE 64 #define LOG2SLOT LOG2_PTRSIZE -#define ENREGISTERED_RETURNTYPE_MAXSIZE 32 // bytes (four FP registers: d0,d1,d2 and d3) +#define ENREGISTERED_RETURNTYPE_MAXSIZE 64 // bytes (four vector registers: q0,q1,q2 and q3) #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 16 // bytes (two int registers: x0 and x1) #define ENREGISTERED_PARAMTYPE_MAXSIZE 16 // bytes (max value type size that can be passed by value) diff --git a/src/vm/callhelpers.h b/src/vm/callhelpers.h index db9cfad6cb1e..f0d718c6ff5d 100644 --- a/src/vm/callhelpers.h +++ b/src/vm/callhelpers.h @@ -39,8 +39,13 @@ struct CallDescrData // Return value // #ifdef ENREGISTERED_RETURNTYPE_MAXSIZE +#ifdef _TARGET_ARM64_ + // Use NEON128 to ensure proper alignment for vectors. + DECLSPEC_ALIGN(16) NEON128 returnValue[ENREGISTERED_RETURNTYPE_MAXSIZE / sizeof(NEON128)]; +#else // Use UINT64 to ensure proper alignment UINT64 returnValue[ENREGISTERED_RETURNTYPE_MAXSIZE / sizeof(UINT64)]; +#endif #else UINT64 returnValue; #endif diff --git a/src/vm/callingconvention.h b/src/vm/callingconvention.h index 7368fecac816..cbc6aad5c4f6 100644 --- a/src/vm/callingconvention.h +++ b/src/vm/callingconvention.h @@ -49,9 +49,25 @@ struct ArgLocDesc #endif // UNIX_AMD64_ABI +#ifdef FEATURE_HFA + static unsigned getHFAFieldSize(CorElementType hfaType) + { + switch (hfaType) + { + case ELEMENT_TYPE_R4: return 4; + case ELEMENT_TYPE_R8: return 8; + // We overload VALUETYPE for 16-byte vectors. + case ELEMENT_TYPE_VALUETYPE: return 16; + default: _ASSERTE(!"Invalid HFA Type"); return 0; + } + } +#endif #if defined(_TARGET_ARM64_) - bool m_isSinglePrecision; // For determining if HFA is single or double - // precision + unsigned m_hfaFieldSize; // Size of HFA field in bytes. + void setHFAFieldSize(CorElementType hfaType) + { + m_hfaFieldSize = getHFAFieldSize(hfaType); + } #endif // defined(_TARGET_ARM64_) #if defined(_TARGET_ARM_) @@ -76,7 +92,7 @@ struct ArgLocDesc m_fRequires64BitAlignment = FALSE; #endif #if defined(_TARGET_ARM64_) - m_isSinglePrecision = FALSE; + m_hfaFieldSize = 0; #endif // defined(_TARGET_ARM64_) #if defined(UNIX_AMD64_ABI) m_eeClass = NULL; @@ -589,10 +605,9 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA()) { CorElementType type = m_argTypeHandle.GetHFAType(); - bool isFloatType = (type == ELEMENT_TYPE_R4); + pLoc->setHFAFieldSize(type); + pLoc->m_cFloatReg = GetArgSize()/pLoc->m_hfaFieldSize; - pLoc->m_cFloatReg = isFloatType ? GetArgSize()/sizeof(float): GetArgSize()/sizeof(double); - pLoc->m_isSinglePrecision = isFloatType; } else { @@ -1297,16 +1312,14 @@ int ArgIteratorTemplate::GetNextOffset() if (thValueType.IsHFA()) { CorElementType type = thValueType.GetHFAType(); - bool isFloatType = (type == ELEMENT_TYPE_R4); - - cFPRegs = (type == ELEMENT_TYPE_R4)? (argSize/sizeof(float)): (argSize/sizeof(double)); m_argLocDescForStructInRegs.Init(); - m_argLocDescForStructInRegs.m_cFloatReg = cFPRegs; m_argLocDescForStructInRegs.m_idxFloatReg = m_idxFPReg; - m_argLocDescForStructInRegs.m_isSinglePrecision = isFloatType; - + m_argLocDescForStructInRegs.setHFAFieldSize(type); + cFPRegs = argSize/m_argLocDescForStructInRegs.m_hfaFieldSize; + m_argLocDescForStructInRegs.m_cFloatReg = cFPRegs; + m_hasArgLocDescForStructInRegs = true; } else @@ -1474,10 +1487,8 @@ void ArgIteratorTemplate::ComputeReturnFlags() { CorElementType hfaType = thValueType.GetHFAType(); - flags |= (hfaType == ELEMENT_TYPE_R4) ? - ((4 * sizeof(float)) << RETURN_FP_SIZE_SHIFT) : - ((4 * sizeof(double)) << RETURN_FP_SIZE_SHIFT); - + int hfaFieldSize = ArgLocDesc::getHFAFieldSize(hfaType); + flags |= ((4 * hfaFieldSize) << RETURN_FP_SIZE_SHIFT); break; } #endif diff --git a/src/vm/class.cpp b/src/vm/class.cpp index af1073fb5109..14eb0595b450 100644 --- a/src/vm/class.cpp +++ b/src/vm/class.cpp @@ -1172,6 +1172,58 @@ bool MethodTable::IsHFA() } #endif // !FEATURE_HFA +//******************************************************************************* +int MethodTable::GetVectorSize() +{ + // This is supported for finding HVA types for Arm64. In order to support the altjit, + // we support this on 64-bit platforms (i.e. Arm64 and X64). +#ifdef _TARGET_64BIT_ + if (IsIntrinsicType()) + { + LPCUTF8 namespaceName; + LPCUTF8 className = GetFullyQualifiedNameInfo(&namespaceName); + int vectorSize = 0; + + if (strcmp(className, "Vector`1") == 0) + { + vectorSize = GetNumInstanceFieldBytes(); + _ASSERTE(strcmp(namespaceName, "System.Numerics") == 0); + return vectorSize; + } + if (strcmp(className, "Vector128`1") == 0) + { + vectorSize = 16; + } + else if (strcmp(className, "Vector256`1") == 0) + { + vectorSize = 32; + } + else if (strcmp(className, "Vector64`1") == 0) + { + vectorSize = 8; + } + if (vectorSize != 0) + { + // We need to verify that T (the element or "base" type) is a primitive type. + TypeHandle typeArg = GetInstantiation()[0]; + CorElementType corType = typeArg.GetSignatureCorElementType(); + bool isSupportedElementType = (corType >= ELEMENT_TYPE_I1 && corType <= ELEMENT_TYPE_R8); + // These element types are not supported for Vector64. + if ((vectorSize == 8) && (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8)) + { + isSupportedElementType = false; + } + if (isSupportedElementType) + { + _ASSERTE(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0); + return vectorSize; + } + } + } +#endif // _TARGET_64BIT_ + return 0; +} + //******************************************************************************* CorElementType MethodTable::GetHFAType() { @@ -1191,17 +1243,28 @@ CorElementType MethodTable::GetHFAType() _ASSERTE(pMT->IsValueType()); _ASSERTE(pMT->GetNumInstanceFields() > 0); + int vectorSize = pMT->GetVectorSize(); + if (vectorSize != 0) + { + return (vectorSize == 8) ? ELEMENT_TYPE_R8 : ELEMENT_TYPE_VALUETYPE; + } + PTR_FieldDesc pFirstField = pMT->GetApproxFieldDescListRaw(); CorElementType fieldType = pFirstField->GetFieldType(); - + // All HFA fields have to be of the same type, so we can just return the type of the first field switch (fieldType) { case ELEMENT_TYPE_VALUETYPE: pMT = pFirstField->LookupApproxFieldTypeHandle().GetMethodTable(); + vectorSize = pMT->GetVectorSize(); + if (vectorSize != 0) + { + return (vectorSize == 8) ? ELEMENT_TYPE_R8 : ELEMENT_TYPE_VALUETYPE; + } break; - + case ELEMENT_TYPE_R4: case ELEMENT_TYPE_R8: return fieldType; @@ -1212,7 +1275,7 @@ CorElementType MethodTable::GetHFAType() _ASSERTE(false); return ELEMENT_TYPE_END; } - } + } } bool MethodTable::IsNativeHFA() @@ -1231,6 +1294,7 @@ CorElementType MethodTable::GetNativeHFAType() // // When FEATURE_HFA is defined, we cache the value; otherwise we recompute it with each // call. The latter is only for the armaltjit and the arm64altjit. +// bool #if defined(FEATURE_HFA) EEClass::CheckForHFA(MethodTable ** pByValueClassCache) @@ -1243,25 +1307,18 @@ EEClass::CheckForHFA() // This method should be called for valuetypes only _ASSERTE(GetMethodTable()->IsValueType()); - // The SIMD Intrinsic types are meant to be handled specially and should not be treated as HFA - if (GetMethodTable()->IsIntrinsicType()) - { - LPCUTF8 namespaceName; - LPCUTF8 className = GetMethodTable()->GetFullyQualifiedNameInfo(&namespaceName); - if ((strcmp(className, "Vector256`1") == 0) || (strcmp(className, "Vector128`1") == 0) || - (strcmp(className, "Vector64`1") == 0)) - { - assert(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0); - return false; - } - - if ((strcmp(className, "Vector`1") == 0) && (strcmp(namespaceName, "System.Numerics") == 0)) - { - return false; - } + // The opaque Vector types appear to have multiple fields, but need to be treated + // as an opaque type of a single vector. + if (GetMethodTable()->GetVectorSize() != 0) + { +#if defined(FEATURE_HFA) + GetMethodTable()->SetIsHFA(); +#endif + return true; } + int elemSize = 0; CorElementType hfaType = ELEMENT_TYPE_END; FieldDesc *pFieldDescList = GetFieldDescList(); @@ -1278,11 +1335,41 @@ EEClass::CheckForHFA() switch (fieldType) { case ELEMENT_TYPE_VALUETYPE: + { +#ifdef _TARGET_ARM64_ + // hfa/hva types are unique by size, except for Vector64 which we can conveniently + // treat as if it were a double for ABI purposes. However, it only qualifies as + // an HVA if all fields are the same type. This will ensure that we only + // consider it an HVA if all the fields are ELEMENT_TYPE_VALUETYPE (which have been + // determined above to be vectors) of the same size. + MethodTable* pMT; +#if defined(FEATURE_HFA) + pMT = pByValueClassCache[i]; +#else + pMT = pFD->LookupApproxFieldTypeHandle().AsMethodTable(); +#endif + int thisElemSize = pMT->GetVectorSize(); + if (thisElemSize != 0) + { + if (elemSize == 0) + { + elemSize = thisElemSize; + } + else if ((thisElemSize != elemSize) || (hfaType != ELEMENT_TYPE_VALUETYPE)) + { + return false; + } + } + else +#endif // _TARGET_ARM64_ + { #if defined(FEATURE_HFA) - fieldType = pByValueClassCache[i]->GetHFAType(); + fieldType = pByValueClassCache[i]->GetHFAType(); #else - fieldType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType(); + fieldType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType(); #endif + } + } break; case ELEMENT_TYPE_R4: @@ -1326,14 +1413,31 @@ EEClass::CheckForHFA() } } - if (hfaType == ELEMENT_TYPE_END) + switch (hfaType) + { + case ELEMENT_TYPE_R4: + elemSize = 4; + break; + case ELEMENT_TYPE_R8: + elemSize = 8; + break; +#ifdef _TARGET_ARM64_ + case ELEMENT_TYPE_VALUETYPE: + // Should already have set elemSize, but be conservative + if (elemSize == 0) + { + return false; + } + break; +#endif + default: + // ELEMENT_TYPE_END return false; + } if (!hasZeroOffsetField) // If the struct doesn't have a zero-offset field, it's not an HFA. return false; - int elemSize = (hfaType == ELEMENT_TYPE_R8) ? sizeof(double) : sizeof(float); - // Note that we check the total size, but do not perform any checks on number of fields: // - Type of fields can be HFA valuetype itself // - Managed C++ HFA valuetypes have just one of type float to signal that @@ -1348,7 +1452,7 @@ EEClass::CheckForHFA() if (totalSize / elemSize > 4) return false; - // All the above tests passed. It's HFA! + // All the above tests passed. It's HFA(/HVA)! #if defined(FEATURE_HFA) GetMethodTable()->SetIsHFA(); #endif @@ -1421,7 +1525,16 @@ CorElementType EEClassLayoutInfo::GetNativeHFATypeRaw() if (hfaType == ELEMENT_TYPE_END) return ELEMENT_TYPE_END; - int elemSize = (hfaType == ELEMENT_TYPE_R8) ? sizeof(double) : sizeof(float); + int elemSize = 1; + switch (hfaType) + { + case ELEMENT_TYPE_R4: elemSize = sizeof(float); break; + case ELEMENT_TYPE_R8: elemSize = sizeof(double); break; +#ifdef _TARGET_ARM64_ + case ELEMENT_TYPE_VALUETYPE: elemSize = 16; break; +#endif + default: _ASSERTE(!"Invalid HFA Type"); + } // Note that we check the total size, but do not perform any checks on number of fields: // - Type of fields can be HFA valuetype itself diff --git a/src/vm/class.h b/src/vm/class.h index 2853aee330e2..a1e7aebc1e21 100644 --- a/src/vm/class.h +++ b/src/vm/class.h @@ -414,8 +414,11 @@ class EEClassLayoutInfo #endif // UNIX_AMD64_ABI #ifdef FEATURE_HFA // HFA type of the unmanaged layout + // Note that these are not flags, they are discrete values. e_R4_HFA = 0x10, e_R8_HFA = 0x20, + e_16_HFA = 0x30, + e_HFATypeFlags = 0x30, #endif }; @@ -526,15 +529,19 @@ class EEClassLayoutInfo bool IsNativeHFA() { LIMITED_METHOD_CONTRACT; - return (m_bFlags & (e_R4_HFA | e_R8_HFA)) != 0; + return (m_bFlags & e_HFATypeFlags) != 0; } CorElementType GetNativeHFAType() { LIMITED_METHOD_CONTRACT; - if (IsNativeHFA()) - return (m_bFlags & e_R4_HFA) ? ELEMENT_TYPE_R4 : ELEMENT_TYPE_R8; - return ELEMENT_TYPE_END; + switch (m_bFlags & e_HFATypeFlags) + { + case e_R4_HFA: return ELEMENT_TYPE_R4; + case e_R8_HFA: return ELEMENT_TYPE_R8; + case e_16_HFA: return ELEMENT_TYPE_VALUETYPE; + default: return ELEMENT_TYPE_END; + } } #else // !FEATURE_HFA bool IsNativeHFA() @@ -580,7 +587,15 @@ class EEClassLayoutInfo void SetNativeHFAType(CorElementType hfaType) { LIMITED_METHOD_CONTRACT; - m_bFlags |= (hfaType == ELEMENT_TYPE_R4) ? e_R4_HFA : e_R8_HFA; + // We should call this at most once. + _ASSERTE((m_bFlags & e_HFATypeFlags) == 0); + switch (hfaType) + { + case ELEMENT_TYPE_R4: m_bFlags |= e_R4_HFA; break; + case ELEMENT_TYPE_R8: m_bFlags |= e_R8_HFA; break; + case ELEMENT_TYPE_VALUETYPE: m_bFlags |= e_16_HFA; break; + default: _ASSERTE(!"Invalid HFA Type"); + } } #endif #ifdef UNIX_AMD64_ABI diff --git a/src/vm/methodtable.h b/src/vm/methodtable.h index 74febebc39bc..154efa2ee4aa 100644 --- a/src/vm/methodtable.h +++ b/src/vm/methodtable.h @@ -1929,6 +1929,9 @@ class MethodTable bool IsHFA(); #endif // FEATURE_HFA + // Returns the size in bytes of this type if it is a HW vector type; 0 otherwise. + int GetVectorSize(); + // Get the HFA type. This is supported both with FEATURE_HFA, in which case it // depends on the cached bit on the class, or without, in which case it is recomputed // for each invocation. diff --git a/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs b/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs index 97c0a42bd199..ad7602246b9d 100644 --- a/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs +++ b/tests/src/JIT/HardwareIntrinsics/Arm64/Simd.cs @@ -1355,7 +1355,6 @@ static void TestExtract() testExtractOp>(name, (x) => Simd.Extract(x, 1), (x) => x[ 1]); testExtractOp>(name, (x) => Simd.Extract(x, 0), (x) => x[ 0]); testExtractOp>(name, (x) => Simd.Extract(x, 1), (x) => x[ 1]); -#if Broken // Test non-constant call testExtractOp>(name, (x) => simdExtract(x, 0), (x) => x[ 0]); @@ -1472,7 +1471,6 @@ static void TestExtract() testThrowsArgumentOutOfRangeException>(name, (x, y) => Simd.Extract(x, 4)); testThrowsArgumentOutOfRangeException>(name, (x, y) => Simd.Extract(x, 2)); testThrowsArgumentOutOfRangeException>(name, (x, y) => Simd.Extract(x, 2)); -#endif testThrowsTypeNotSupported>(name, (x, y) => { return Simd.Extract(x, 1) > 1 ? x : y; }); testThrowsTypeNotSupported>(name, (x, y) => { return Simd.Extract(x, 1) > 1 ? x : y; }); @@ -1528,7 +1526,6 @@ static void TestInsert() testPermuteOp>(name, (x, y) => Simd.Insert(x, 1, (ushort)2), (i, x, y) => (ushort)(i != 1 ? x[i] : 2)); testPermuteOp>(name, (x, y) => Simd.Insert(x, 1, (int )2), (i, x, y) => (int )(i != 1 ? x[i] : 2)); testPermuteOp>(name, (x, y) => Simd.Insert(x, 1, (uint )2), (i, x, y) => (uint )(i != 1 ? x[i] : 2)); -#if Broken testPermuteOp>(name, (x, y) => Simd.Insert(x, 3, Simd.Extract(y, 1)), (i, x, y) => (float )(i != 3 ? x[i] : y[1])); testPermuteOp>(name, (x, y) => Simd.Insert(x, 0, Simd.Extract(y, 1)), (i, x, y) => (double)(i != 0 ? x[i] : y[1])); @@ -1565,7 +1562,6 @@ static void TestInsert() testThrowsArgumentOutOfRangeException, Vector64< ushort>>(name, (x, y) => Simd.Insert(x, 4, (ushort)1)); testThrowsArgumentOutOfRangeException, Vector64< int >>(name, (x, y) => Simd.Insert(x, 2, (int )1)); testThrowsArgumentOutOfRangeException, Vector64< uint >>(name, (x, y) => Simd.Insert(x, 2, (uint )1)); -#endif testThrowsTypeNotSupported>(name, (x, y) => Simd.Insert(x, 1, true)); testThrowsTypeNotSupported>(name, (x, y) => Simd.Insert(x, 1, ( long )5));