Skip to content

Commit

Permalink
ARM64-SVE: Add TrigonometricMultiplyAddCoefficient (#104697)
Browse files Browse the repository at this point in the history
  • Loading branch information
amanasifkhalid committed Jul 15, 2024
1 parent ad25cd0 commit 5354e0f
Show file tree
Hide file tree
Showing 11 changed files with 155 additions and 4 deletions.
5 changes: 5 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,11 @@ void HWIntrinsicInfo::lookupImmBounds(
immUpperBound = (int)SVE_PRFOP_CONST15;
break;

case NI_Sve_TrigonometricMultiplyAddCoefficient:
immLowerBound = 0;
immUpperBound = 7;
break;

default:
unreached();
}
Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2330,6 +2330,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_TrigonometricMultiplyAddCoefficient:
{
assert(isRMW);

if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);

GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
}

HWIntrinsicImmOpHelper helper(this, intrin.op3, node);

for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
GetEmitter()->emitInsSve_R_R_I(ins, emitSize, targetReg, op2Reg, helper.ImmValue(), opt);
}
break;
}

default:
unreached();
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ HARDWARE_INTRINSIC(Sve, TestFirstTrue,
HARDWARE_INTRINSIC(Sve, TestLastTrue, -1, 2, true, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, true, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, true, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TrigonometricMultiplyAddCoefficient, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftmad, INS_sve_ftmad}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TrigonometricSelectCoefficient, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftssel, INS_sve_ftssel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, TrigonometricStartingValue, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ftsmul, INS_sve_ftsmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3434,6 +3434,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
case NI_Sve_ExtractVector:
case NI_Sve_TrigonometricMultiplyAddCoefficient:
assert(hasImmediateOperand);
assert(varTypeIsIntegral(intrin.op3));
if (intrin.op3->IsCnsIntOrI())
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1450,6 +1450,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
case NI_Sve_ExtractVector:
case NI_Sve_TrigonometricMultiplyAddCoefficient:
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
break;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8631,6 +8631,21 @@ internal Arm64() { }
public static unsafe Vector<ulong> TransposeOdd(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


/// Trigonometric multiply-add coefficient

/// <summary>
/// svfloat64_t svtmad[_f64](svfloat64_t op1, svfloat64_t op2, uint64_t imm3)
/// FTMAD Ztied1.D, Ztied1.D, Zop2.D, #imm3
/// </summary>
public static unsafe Vector<double> TrigonometricMultiplyAddCoefficient(Vector<double> left, Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svtmad[_f32](svfloat32_t op1, svfloat32_t op2, uint64_t imm3)
/// FTMAD Ztied1.S, Ztied1.S, Zop2.S, #imm3
/// </summary>
public static unsafe Vector<float> TrigonometricMultiplyAddCoefficient(Vector<float> left, Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) { throw new PlatformNotSupportedException(); }


/// Trigonometric select coefficient

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8676,6 +8676,21 @@ internal Arm64() { }
public static unsafe Vector<ulong> TransposeOdd(Vector<ulong> left, Vector<ulong> right) => TransposeOdd(left, right);


/// Trigonometric multiply-add coefficient

/// <summary>
/// svfloat64_t svtmad[_f64](svfloat64_t op1, svfloat64_t op2, uint64_t imm3)
/// FTMAD Ztied1.D, Ztied1.D, Zop2.D, #imm3
/// </summary>
public static unsafe Vector<double> TrigonometricMultiplyAddCoefficient(Vector<double> left, Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) => TrigonometricMultiplyAddCoefficient(left, right, control);

/// <summary>
/// svfloat32_t svtmad[_f32](svfloat32_t op1, svfloat32_t op2, uint64_t imm3)
/// FTMAD Ztied1.S, Ztied1.S, Zop2.S, #imm3
/// </summary>
public static unsafe Vector<float> TrigonometricMultiplyAddCoefficient(Vector<float> left, Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) => TrigonometricMultiplyAddCoefficient(left, right, control);


/// Trigonometric select coefficient

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5603,6 +5603,8 @@ internal Arm64() { }
public static System.Numerics.Vector<uint> TransposeOdd(System.Numerics.Vector<uint> left, System.Numerics.Vector<uint> right) { throw null; }
public static System.Numerics.Vector<ulong> TransposeOdd(System.Numerics.Vector<ulong> left, System.Numerics.Vector<ulong> right) { throw null; }

public static System.Numerics.Vector<double> TrigonometricMultiplyAddCoefficient(System.Numerics.Vector<double> left, System.Numerics.Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) { throw null; }
public static System.Numerics.Vector<float> TrigonometricMultiplyAddCoefficient(System.Numerics.Vector<float> left, System.Numerics.Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) { throw null; }
public static System.Numerics.Vector<double> TrigonometricSelectCoefficient(System.Numerics.Vector<double> value, System.Numerics.Vector<ulong> selector) { throw null; }
public static System.Numerics.Vector<float> TrigonometricSelectCoefficient(System.Numerics.Vector<float> value, System.Numerics.Vector<uint> selector) { throw null; }
public static System.Numerics.Vector<double> TrigonometricStartingValue(System.Numerics.Vector<double> value, System.Numerics.Vector<ulong> sign) { throw null; }
Expand Down
Loading

0 comments on commit 5354e0f

Please sign in to comment.