Skip to content

Commit

Permalink
Add wasm support for int32->f64 and f32->f64 simd ops (#5863)
Browse files Browse the repository at this point in the history
* Add wasm support for int32->f64 and f32->f64 simd ops

At top-of-tree LLVM, the wasm backend never seems to emit the vector version of these ops; pattern-match to target them specifically.
  • Loading branch information
steven-johnson committed Mar 30, 2021
1 parent e7eec5c commit 7bbe2fd
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 5 deletions.
7 changes: 7 additions & 0 deletions src/CodeGen_WebAssembly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ const WasmIntrinsic intrinsic_defs[] = {
{"llvm.wasm.extadd.pairwise.unsigned.v8i16", Int(16, 8), "pairwise_widening_add", {UInt(8, 16)}, Target::WasmSimd128},
{"llvm.wasm.extadd.pairwise.unsigned.v4i32", Int(32, 4), "pairwise_widening_add", {UInt(16, 8)}, Target::WasmSimd128},

{"i32_to_double_s", Float(64, 4), "int_to_double", {Int(32, 4)}, Target::WasmSimd128},
{"i32_to_double_u", Float(64, 4), "int_to_double", {UInt(32, 4)}, Target::WasmSimd128},
{"float_to_double", Float(64, 4), "float_to_double", {Float(32, 4)}, Target::WasmSimd128},

// Basically like ARM's SQRDMULH
{"llvm.wasm.q15mulr.sat.signed", Int(16, 8), "q15mulr_sat_s", {Int(16, 8), Int(16, 8)}, Target::WasmSimd128},

Expand Down Expand Up @@ -147,6 +151,9 @@ void CodeGen_WebAssembly::visit(const Cast *op) {
{"saturating_narrow", u8_sat(wild_i16x_), Target::WasmSimd128},
{"saturating_narrow", i16_sat(wild_i32x_), Target::WasmSimd128},
{"saturating_narrow", u16_sat(wild_i32x_), Target::WasmSimd128},
{"int_to_double", f64(wild_i32x_), Target::WasmSimd128},
{"int_to_double", f64(wild_u32x_), Target::WasmSimd128},
{"float_to_double", f64(wild_f32x_), Target::WasmSimd128},
};
// clang-format on

Expand Down
33 changes: 33 additions & 0 deletions src/runtime/wasm_math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,36 @@ define weak_odr <8 x i16> @saturating_narrow_i32x8_to_u16x8(<8 x i32> %x) nounwi
%3 = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> %1, <4 x i32> %2)
ret <8 x i16> %3
}

; Integer to double-precision floating point

declare <2 x double> @llvm.wasm.convert.low.signed(<4 x i32>)
declare <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32>)

define weak_odr <4 x double> @i32_to_double_s(<4 x i32> %x) nounwind alwaysinline {
%1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%2 = tail call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x)
%3 = tail call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %1)
%4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x double> %4
}

define weak_odr <4 x double> @i32_to_double_u(<4 x i32> %x) nounwind alwaysinline {
%1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%2 = tail call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %x)
%3 = tail call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %1)
%4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x double> %4
}

; single to double-precision floating point

declare <2 x double> @llvm.wasm.promote.low(<4 x float>)

define weak_odr <4 x double> @float_to_double(<4 x float> %x) nounwind alwaysinline {
%1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%2 = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> %x)
%3 = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> %1)
%4 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x double> %4
}
8 changes: 3 additions & 5 deletions test/correctness/simd_op_check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2137,9 +2137,8 @@ class SimdOpCheck : public SimdOpCheckTest {
check("f32x4.convert_i32x4_u", 8 * w, cast<float>(u32_1));

// Integer to double-precision floating point
// TODO(https://github.com/halide/Halide/issues/5130): NOT BEING GENERATED AT TRUNK
// check("f64x2.convert_low_i32x4_s", 4 * w, cast<double>(i32_1));
// check("f64x2.convert_low_i32x4_u", 4 * w, cast<double>(u32_1));
check("f64x2.convert_low_i32x4_s", 2 * w, cast<double>(i32_1));
check("f64x2.convert_low_i32x4_u", 2 * w, cast<double>(u32_1));

// Single-precision floating point to integer with saturation
check("i32x4.trunc_sat_f32x4_s", 4 * w, cast<int32_t>(f32_1));
Expand All @@ -2155,8 +2154,7 @@ class SimdOpCheck : public SimdOpCheckTest {
// check("f32x4.demote_f64x2_zero", 4 * w, ???);

// Single-precision floating point to double-precision
// TODO(https://github.com/halide/Halide/issues/5130): NOT BEING GENERATED AT TRUNK
// check("f64x2.promote_low_f32x4", 4 * w, ???);
check("f64x2.promote_low_f32x4", 2 * w, cast<double>(f32_1));

// Integer to integer narrowing
check("i8x16.narrow_i16x8_s", 16 * w, i8_sat(i16_1));
Expand Down

0 comments on commit 7bbe2fd

Please sign in to comment.