From 7bbe2fdb24c86a2b21aa6a44dc3175cb1c4d10e7 Mon Sep 17 00:00:00 2001 From: Steven Johnson Date: Tue, 30 Mar 2021 16:00:23 -0700 Subject: [PATCH] Add wasm support for int32->f64 and f32->f64 simd ops (#5863) * Add wasm support for int32->f64 and f32->f64 simd ops At top-of-tree LLVM, the wasm backend never seems to emit the vector version of these ops; pattern-match to target them specifically. --- src/CodeGen_WebAssembly.cpp | 7 +++++++ src/runtime/wasm_math.ll | 33 ++++++++++++++++++++++++++++++ test/correctness/simd_op_check.cpp | 8 +++----- 3 files changed, 43 insertions(+), 5 deletions(-) diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp index 61eb4aa28888..e13431830f6c 100644 --- a/src/CodeGen_WebAssembly.cpp +++ b/src/CodeGen_WebAssembly.cpp @@ -94,6 +94,10 @@ const WasmIntrinsic intrinsic_defs[] = { {"llvm.wasm.extadd.pairwise.unsigned.v8i16", Int(16, 8), "pairwise_widening_add", {UInt(8, 16)}, Target::WasmSimd128}, {"llvm.wasm.extadd.pairwise.unsigned.v4i32", Int(32, 4), "pairwise_widening_add", {UInt(16, 8)}, Target::WasmSimd128}, + {"i32_to_double_s", Float(64, 4), "int_to_double", {Int(32, 4)}, Target::WasmSimd128}, + {"i32_to_double_u", Float(64, 4), "int_to_double", {UInt(32, 4)}, Target::WasmSimd128}, + {"float_to_double", Float(64, 4), "float_to_double", {Float(32, 4)}, Target::WasmSimd128}, + // Basically like ARM's SQRDMULH {"llvm.wasm.q15mulr.sat.signed", Int(16, 8), "q15mulr_sat_s", {Int(16, 8), Int(16, 8)}, Target::WasmSimd128}, @@ -147,6 +151,9 @@ void CodeGen_WebAssembly::visit(const Cast *op) { {"saturating_narrow", u8_sat(wild_i16x_), Target::WasmSimd128}, {"saturating_narrow", i16_sat(wild_i32x_), Target::WasmSimd128}, {"saturating_narrow", u16_sat(wild_i32x_), Target::WasmSimd128}, + {"int_to_double", f64(wild_i32x_), Target::WasmSimd128}, + {"int_to_double", f64(wild_u32x_), Target::WasmSimd128}, + {"float_to_double", f64(wild_f32x_), Target::WasmSimd128}, }; // clang-format on diff --git a/src/runtime/wasm_math.ll b/src/runtime/wasm_math.ll index 6ea43e90fed2..1748a841da7c 100644 --- a/src/runtime/wasm_math.ll +++ b/src/runtime/wasm_math.ll @@ -138,3 +138,36 @@ define weak_odr <8 x i16> @saturating_narrow_i32x8_to_u16x8(<8 x i32> %x) nounwi %3 = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> %1, <4 x i32> %2) ret <8 x i16> %3 } + +; Integer to double-precision floating point + +declare <2 x double> @llvm.wasm.convert.low.signed(<4 x i32>) +declare <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32>) + +define weak_odr <4 x double> @i32_to_double_s(<4 x i32> %x) nounwind alwaysinline { + %1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> + %2 = tail call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x) + %3 = tail call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %1) + %4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> + ret <4 x double> %4 +} + +define weak_odr <4 x double> @i32_to_double_u(<4 x i32> %x) nounwind alwaysinline { + %1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> + %2 = tail call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %x) + %3 = tail call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %1) + %4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> + ret <4 x double> %4 +} + +; single to double-precision floating point + +declare <2 x double> @llvm.wasm.promote.low(<4 x float>) + +define weak_odr <4 x double> @float_to_double(<4 x float> %x) nounwind alwaysinline { + %1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> + %2 = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> %x) + %3 = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> %1) + %4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> + ret <4 x double> %4 +} diff --git a/test/correctness/simd_op_check.cpp b/test/correctness/simd_op_check.cpp index 2e910f66fb8f..db4f24f407d3 100644 --- a/test/correctness/simd_op_check.cpp +++ b/test/correctness/simd_op_check.cpp @@ -2137,9 +2137,8 @@ class SimdOpCheck : public SimdOpCheckTest { check("f32x4.convert_i32x4_u", 8 * w, cast(u32_1)); // Integer to double-precision floating point - // TODO(https://github.com/halide/Halide/issues/5130): NOT BEING GENERATED AT TRUNK - // check("f64x2.convert_low_i32x4_s", 4 * w, cast(i32_1)); - // check("f64x2.convert_low_i32x4_u", 4 * w, cast(u32_1)); + check("f64x2.convert_low_i32x4_s", 2 * w, cast(i32_1)); + check("f64x2.convert_low_i32x4_u", 2 * w, cast(u32_1)); // Single-precision floating point to integer with saturation check("i32x4.trunc_sat_f32x4_s", 4 * w, cast(f32_1)); @@ -2155,8 +2154,7 @@ class SimdOpCheck : public SimdOpCheckTest { // check("f32x4.demote_f64x2_zero", 4 * w, ???); // Single-precision floating point to double-precision - // TODO(https://github.com/halide/Halide/issues/5130): NOT BEING GENERATED AT TRUNK - // check("f64x2.promote_low_f32x4", 4 * w, ???); + check("f64x2.promote_low_f32x4", 2 * w, cast(f32_1)); // Integer to integer narrowing check("i8x16.narrow_i16x8_s", 16 * w, i8_sat(i16_1));