dotnet · amanasifkhalid · Jul 15, 2024 · Jul 10, 2024 · Jul 11, 2024 · Jul 12, 2024
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -504,6 +504,11 @@ void HWIntrinsicInfo::lookupImmBounds(
                 immUpperBound = (int)SVE_PRFOP_CONST15;
                 break;
 
+            case NI_Sve_TrigonometricMultiplyAddCoefficient:
+                immLowerBound = 0;
+                immUpperBound = 7;
+                break;
+
             default:
                 unreached();
         }

diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp
@@ -2297,6 +2297,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 break;
             }
 
+            case NI_Sve_TrigonometricMultiplyAddCoefficient:
+            {
+                assert(isRMW);
+
+                if (targetReg != op1Reg)
+                {
+                    assert(targetReg != op2Reg);
+
+                    GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
+                }
+
+                HWIntrinsicImmOpHelper helper(this, intrin.op3, node);
+
+                for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
+                {
+                    GetEmitter()->emitInsSve_R_R_I(ins, emitSize, targetReg, op2Reg, helper.ImmValue(), opt);
+                }
+                break;
+            }
+
             default:
                 unreached();
         }

diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h
@@ -243,6 +243,7 @@ HARDWARE_INTRINSIC(Sve,           TestFirstTrue,
 HARDWARE_INTRINSIC(Sve,           TestLastTrue,                                                     -1,      2,      true,  {INS_sve_ptest,      INS_sve_ptest,      INS_sve_ptest,      INS_sve_ptest,      INS_sve_ptest,      INS_sve_ptest,      INS_sve_ptest,      INS_sve_ptest,      INS_invalid,        INS_invalid},     HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Sve,           TransposeEven,                                                    -1,      2,      true,  {INS_sve_trn1,       INS_sve_trn1,       INS_sve_trn1,       INS_sve_trn1,       INS_sve_trn1,       INS_sve_trn1,       INS_sve_trn1,       INS_sve_trn1,       INS_sve_trn1,       INS_sve_trn1},    HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Sve,           TransposeOdd,                                                     -1,      2,      true,  {INS_sve_trn2,       INS_sve_trn2,       INS_sve_trn2,       INS_sve_trn2,       INS_sve_trn2,       INS_sve_trn2,       INS_sve_trn2,       INS_sve_trn2,       INS_sve_trn2,       INS_sve_trn2},    HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(Sve,           TrigonometricMultiplyAddCoefficient,                              -1,      3,      true,  {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_sve_ftmad,      INS_sve_ftmad},   HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Sve,           UnzipEven,                                                        -1,      2,      true,  {INS_sve_uzp1,       INS_sve_uzp1,       INS_sve_uzp1,       INS_sve_uzp1,       INS_sve_uzp1,       INS_sve_uzp1,       INS_sve_uzp1,       INS_sve_uzp1,       INS_sve_uzp1,       INS_sve_uzp1},    HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Sve,           UnzipOdd,                                                         -1,      2,      true,  {INS_sve_uzp2,       INS_sve_uzp2,       INS_sve_uzp2,       INS_sve_uzp2,       INS_sve_uzp2,       INS_sve_uzp2,       INS_sve_uzp2,       INS_sve_uzp2,       INS_sve_uzp2,       INS_sve_uzp2},    HW_Category_SIMD,                  HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Sve,           VectorTableLookup,                                                -1,      2,      true,  {INS_sve_tbl,        INS_sve_tbl,        INS_sve_tbl,        INS_sve_tbl,        INS_sve_tbl,        INS_sve_tbl,        INS_sve_tbl,        INS_sve_tbl,        INS_sve_tbl,        INS_sve_tbl},     HW_Category_SIMD,                  HW_Flag_Scalable)

diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
@@ -3351,6 +3351,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
             case NI_Sve_PrefetchInt32:
             case NI_Sve_PrefetchInt64:
             case NI_Sve_ExtractVector:
+            case NI_Sve_TrigonometricMultiplyAddCoefficient:
                 assert(hasImmediateOperand);
                 assert(varTypeIsIntegral(intrin.op3));
                 if (intrin.op3->IsCnsIntOrI())

diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp
@@ -1450,6 +1450,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
                     case NI_Sve_PrefetchInt32:
                     case NI_Sve_PrefetchInt64:
                     case NI_Sve_ExtractVector:
+                    case NI_Sve_TrigonometricMultiplyAddCoefficient:
                         needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
                         break;
 

diff --git a/...ries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/...ries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs
@@ -8488,6 +8488,21 @@ internal Arm64() { }
         public static unsafe Vector<ulong> TransposeOdd(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }
 
 
+        ///  Trigonometric multiply-add coefficient
+
+        /// <summary>
+        /// svfloat64_t svtmad[_f64](svfloat64_t op1, svfloat64_t op2, uint64_t imm3)
+        ///   FTMAD Ztied1.D, Ztied1.D, Zop2.D, #imm3
+        /// </summary>
+        public static unsafe Vector<double> TrigonometricMultiplyAddCoefficient(Vector<double> left, Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// svfloat32_t svtmad[_f32](svfloat32_t op1, svfloat32_t op2, uint64_t imm3)
+        ///   FTMAD Ztied1.S, Ztied1.S, Zop2.S, #imm3
+        /// </summary>
+        public static unsafe Vector<float> TrigonometricMultiplyAddCoefficient(Vector<float> left, Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) { throw new PlatformNotSupportedException(); }
+
+
         ///  UnzipEven : Concatenate even elements from two inputs
 
         /// <summary>

diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs
@@ -8533,6 +8533,21 @@ internal Arm64() { }
         public static unsafe Vector<ulong> TransposeOdd(Vector<ulong> left, Vector<ulong> right) => TransposeOdd(left, right);
 
 
+        ///  Trigonometric multiply-add coefficient
+
+        /// <summary>
+        /// svfloat64_t svtmad[_f64](svfloat64_t op1, svfloat64_t op2, uint64_t imm3)
+        ///   FTMAD Ztied1.D, Ztied1.D, Zop2.D, #imm3
+        /// </summary>
+        public static unsafe Vector<double> TrigonometricMultiplyAddCoefficient(Vector<double> left, Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) => TrigonometricMultiplyAddCoefficient(left, right, control);
+
+        /// <summary>
+        /// svfloat32_t svtmad[_f32](svfloat32_t op1, svfloat32_t op2, uint64_t imm3)
+        ///   FTMAD Ztied1.S, Ztied1.S, Zop2.S, #imm3
+        /// </summary>
+        public static unsafe Vector<float> TrigonometricMultiplyAddCoefficient(Vector<float> left, Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) => TrigonometricMultiplyAddCoefficient(left, right, control);
+
+
         ///  UnzipEven : Concatenate even elements from two inputs
 
         /// <summary>

diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -5458,6 +5458,9 @@ internal Arm64() { }
         public static System.Numerics.Vector<uint> TransposeOdd(System.Numerics.Vector<uint> left, System.Numerics.Vector<uint> right) { throw null; }
         public static System.Numerics.Vector<ulong> TransposeOdd(System.Numerics.Vector<ulong> left, System.Numerics.Vector<ulong> right) { throw null; }
 
+        public static System.Numerics.Vector<double> TrigonometricMultiplyAddCoefficient(System.Numerics.Vector<double> left, System.Numerics.Vector<double> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) { throw null; }
+        public static System.Numerics.Vector<float> TrigonometricMultiplyAddCoefficient(System.Numerics.Vector<float> left, System.Numerics.Vector<float> right, [ConstantExpected(Min = 0, Max = (byte)(7))] byte control) { throw null; }
+
         public static System.Numerics.Vector<sbyte> UnzipEven(System.Numerics.Vector<sbyte> left, System.Numerics.Vector<sbyte> right) { throw null; }
         public static System.Numerics.Vector<short> UnzipEven(System.Numerics.Vector<short> left, System.Numerics.Vector<short> right) { throw null; }
         public static System.Numerics.Vector<int> UnzipEven(System.Numerics.Vector<int> left, System.Numerics.Vector<int> right) { throw null; }

diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs
@@ -4084,6 +4084,9 @@
     ("SveSimpleVecOpTest.template",                  new Dictionary<string, string> { ["TestName"] = "Sve_Sqrt_float",                                                                             ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Sqrt",                                                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single",                                                                                                                                 ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",                                                                                                                                                  ["ValidateIterResult"] = "result[i] != Helpers.Sqrt(firstOp[i])", ["GetIterResult"] = "Helpers.Sqrt(leftOp[i])"}),
     ("SveSimpleVecOpTest.template",                  new Dictionary<string, string> { ["TestName"] = "Sve_Sqrt_double",                                                                            ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Sqrt",                                                                ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double",                                                                                                                                 ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",                                                                                                                                                  ["ValidateIterResult"] = "result[i] != Helpers.Sqrt(firstOp[i])", ["GetIterResult"] = "Helpers.Sqrt(leftOp[i])"}),
 
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float",                                                          ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "(byte)(TestLibrary.Generator.GetByte() % 8)",                               ["ValidateIterResult"] = "Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i]", ["GetIterResult"] = "Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm)"}),
+    ("SveVecImmBinOpTest.template",       new Dictionary<string, string> {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double",                                                         ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "TrigonometricMultiplyAddCoefficient",                                       ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "(byte)(TestLibrary.Generator.GetByte() % 8)",                               ["ValidateIterResult"] = "Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i]", ["GetIterResult"] = "Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm)"}),
+
     ("SveVecTernOpMaskedTest.template",   new Dictionary<string, string> { ["TestName"] = "Sve_Splice_float",                                                                                      ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Splice",                                                                    ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Single",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Single",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",        ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                                                        ["ValidateIterResult"] = "result[i] != Helpers.Splice(first, second, maskArray, i)",  ["GetIterResult"] = "Helpers.Splice(left, right, mask, i)", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}),
     ("SveVecTernOpMaskedTest.template",   new Dictionary<string, string> { ["TestName"] = "Sve_Splice_double",                                                                                     ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Splice",                                                                    ["RetVectorType"] = "Vector",    ["RetBaseType"] = "Double",  ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "Double",                                                              ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",        ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                                                        ["ValidateIterResult"] = "result[i] != Helpers.Splice(first, second, maskArray, i)",  ["GetIterResult"] = "Helpers.Splice(left, right, mask, i)", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}),
     ("SveVecTernOpMaskedTest.template",   new Dictionary<string, string> { ["TestName"] = "Sve_Splice_sbyte",                                                                                      ["Isa"] = "Sve",           ["LoadIsa"] = "Sve",     ["Method"] = "Splice",                                                                    ["RetVectorType"] = "Vector",    ["RetBaseType"] = "SByte",   ["Op1VectorType"] = "Vector",    ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector",    ["Op2BaseType"] = "SByte",                                                               ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()",         ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()",                                                                                         ["ValidateIterResult"] = "result[i] != Helpers.Splice(first, second, maskArray, i)",  ["GetIterResult"] = "Helpers.Splice(left, right, mask, i)", ["ConvertFunc"] = ""}),

diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs
@@ -5218,6 +5218,33 @@ public static float MultiplyExtended(float op1, float op2)
             }
         }
 
+        public static float TrigonometricMultiplyAddCoefficient(float op1, float op2, byte imm)
+        {
+            int index = (op2 < 0) ? (imm + 8) : imm;
+            uint coeff = index switch
+            {
+                 0 => 0x3f800000,
+                 1 => 0xbe2aaaab,
+                 2 => 0x3c088886,
+                 3 => 0xb95008b9,
+                 4 => 0x36369d6d,
+                 5 => 0x00000000,
+                 6 => 0x00000000,
+                 7 => 0x00000000,
+                 8 => 0x3f800000,
+                 9 => 0xbf000000,
+                10 => 0x3d2aaaa6,
+                11 => 0xbab60705,
+                12 => 0x37cd37cc,
+                13 => 0x00000000,
+                14 => 0x00000000,
+                15 => 0x00000000,
+                 _ => 0x00000000
+            };
+
+            return MathF.FusedMultiplyAdd(op1, Math.Abs(op2), BitConverter.UInt32BitsToSingle(coeff));
+        }
+
         public static float FPReciprocalStepFused(float op1, float op2) => FusedMultiplySubtract(2, op1, op2);
 
         public static float FPReciprocalSqrtStepFused(float op1, float op2) => FusedMultiplySubtract(3, op1, op2) / 2;
@@ -5262,6 +5289,33 @@ public static double MultiplyExtended(double op1, double op2)
             }
         }
 
+        public static double TrigonometricMultiplyAddCoefficient(double op1, double op2, byte imm)
+        {
+            int index = (op2 < 0) ? (imm + 8) : imm;
+            ulong coeff = index switch
+            {
+                 0 => 0x3ff0000000000000,
+                 1 => 0xbfc5555555555543,
+                 2 => 0x3f8111111110f30c,
+                 3 => 0xbf2a01a019b92fc6,
+                 4 => 0x3ec71de351f3d22b,
+                 5 => 0xbe5ae5e2b60f7b91,
+                 6 => 0x3de5d8408868552f,
+                 7 => 0x0000000000000000,
+                 8 => 0x3ff0000000000000,
+                 9 => 0xbfe0000000000000,
+                10 => 0x3fa5555555555536,
+                11 => 0xbf56c16c16c13a0b,
+                12 => 0x3efa01a019b1e8d8,
+                13 => 0xbe927e4f7282f468,
+                14 => 0x3e21ee96d2641b13,
+                15 => 0xbda8f76380fbb401,
+                 _ => 0x0000000000000000
+            };
+
+            return Math.FusedMultiplyAdd(op1, Math.Abs(op2), BitConverter.UInt64BitsToDouble(coeff));
+        }
+
         public static double FPReciprocalStepFused(double op1, double op2) => FusedMultiplySubtract(2, op1, op2);
 
         public static double FPReciprocalSqrtStepFused(double op1, double op2) => FusedMultiplySubtract(3, op1, op2) / 2;