Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add intrinsics for portable packed simd vector reductions #48983

Merged
merged 10 commits into from
Mar 17, 2018
40 changes: 40 additions & 0 deletions src/librustc_llvm/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1201,6 +1201,46 @@ extern "C" {
Name: *const c_char)
-> ValueRef;

pub fn LLVMRustBuildVectorReduceFAdd(B: BuilderRef,
Acc: ValueRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMul(B: BuilderRef,
Acc: ValueRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceAdd(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMul(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceAnd(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceOr(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceXor(B: BuilderRef,
Src: ValueRef)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMin(B: BuilderRef,
Src: ValueRef,
IsSigned: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceMax(B: BuilderRef,
Src: ValueRef,
IsSigned: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMin(B: BuilderRef,
Src: ValueRef,
IsNaN: bool)
-> ValueRef;
pub fn LLVMRustBuildVectorReduceFMax(B: BuilderRef,
Src: ValueRef,
IsNaN: bool)
-> ValueRef;

pub fn LLVMBuildIsNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef;
pub fn LLVMBuildIsNotNull(B: BuilderRef, Val: ValueRef, Name: *const c_char) -> ValueRef;
pub fn LLVMBuildPtrDiff(B: BuilderRef,
Expand Down
93 changes: 93 additions & 0 deletions src/librustc_trans/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -955,6 +955,99 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
}
}

pub fn vector_reduce_fadd_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fadd_fast");
unsafe {
// FIXME: add a non-fast math version once
// https://bugs.llvm.org/show_bug.cgi?id=36732
// is fixed.
let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_fmul_fast(&self, acc: ValueRef, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmul_fast");
unsafe {
// FIXME: add a non-fast math version once
// https://bugs.llvm.org/show_bug.cgi?id=36732
// is fixed.
let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_add(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.add");
unsafe {
llvm::LLVMRustBuildVectorReduceAdd(self.llbuilder, src)
}
}
pub fn vector_reduce_mul(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.mul");
unsafe {
llvm::LLVMRustBuildVectorReduceMul(self.llbuilder, src)
}
}
pub fn vector_reduce_and(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.and");
unsafe {
llvm::LLVMRustBuildVectorReduceAnd(self.llbuilder, src)
}
}
pub fn vector_reduce_or(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.or");
unsafe {
llvm::LLVMRustBuildVectorReduceOr(self.llbuilder, src)
}
}
pub fn vector_reduce_xor(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.xor");
unsafe {
llvm::LLVMRustBuildVectorReduceXor(self.llbuilder, src)
}
}
pub fn vector_reduce_fmin(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmin");
unsafe {
llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, true)
}
}
pub fn vector_reduce_fmax(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmax");
unsafe {
llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, true)
}
}
pub fn vector_reduce_fmin_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmin_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, false);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_fmax_fast(&self, src: ValueRef) -> ValueRef {
self.count_insn("vector.reduce.fmax_fast");
unsafe {
let instr = llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, false);
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
instr
}
}
pub fn vector_reduce_min(&self, src: ValueRef, is_signed: bool) -> ValueRef {
self.count_insn("vector.reduce.min");
unsafe {
llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed)
}
}
pub fn vector_reduce_max(&self, src: ValueRef, is_signed: bool) -> ValueRef {
self.count_insn("vector.reduce.max");
unsafe {
llvm::LLVMRustBuildVectorReduceMax(self.llbuilder, src, is_signed)
}
}

pub fn extract_value(&self, agg_val: ValueRef, idx: u64) -> ValueRef {
self.count_insn("extractvalue");
assert_eq!(idx as c_uint as u64, idx);
Expand Down
143 changes: 140 additions & 3 deletions src/librustc_trans/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1018,14 +1018,22 @@ fn generic_simd_intrinsic<'a, 'tcx>(
name, $($fmt)*));
}
}
macro_rules! require {
($cond: expr, $($fmt: tt)*) => {
if !$cond {
macro_rules! return_error {
($($fmt: tt)*) => {
{
emit_error!($($fmt)*);
return Err(());
}
}
}

macro_rules! require {
($cond: expr, $($fmt: tt)*) => {
if !$cond {
return_error!($($fmt)*);
}
};
}
macro_rules! require_simd {
($ty: expr, $position: expr) => {
require!($ty.is_simd(), "expected SIMD {} type, found non-SIMD `{}`", $position, $ty)
Expand Down Expand Up @@ -1142,6 +1150,135 @@ fn generic_simd_intrinsic<'a, 'tcx>(
return Ok(bx.extract_element(args[0].immediate(), args[1].immediate()))
}

macro_rules! arith_red {
($name:tt : $integer_reduce:ident, $float_reduce:ident, $ordered:expr) => {
if name == $name {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_) | ty::TyUint(_) => {
let r = bx.$integer_reduce(args[0].immediate());
if $ordered {
// if overflow occurs, the result is the
// mathematical result modulo 2^n:
if name.contains("mul") {
Ok(bx.mul(args[1].immediate(), r))
} else {
Ok(bx.add(args[1].immediate(), r))
}
} else {
Ok(bx.$integer_reduce(args[0].immediate()))
}
},
ty::TyFloat(f) => {
// ordered arithmetic reductions take an accumulator
let acc = if $ordered {
args[1].immediate()
} else {
// unordered arithmetic reductions do not:
match f.bit_width() {
32 => C_undef(Type::f32(bx.cx)),
64 => C_undef(Type::f64(bx.cx)),
v => {
return_error!(r#"
unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
$name, in_ty, in_elem, v, ret_ty
)
}
}

};
Ok(bx.$float_reduce(acc, args[0].immediate()))
}
_ => {
return_error!(
"unsupported {} from `{}` with element `{}` to `{}`",
$name, in_ty, in_elem, ret_ty
)
},
}
}
}
}

arith_red!("simd_reduce_add_ordered": vector_reduce_add, vector_reduce_fadd_fast, true);
arith_red!("simd_reduce_mul_ordered": vector_reduce_mul, vector_reduce_fmul_fast, true);
arith_red!("simd_reduce_add_unordered": vector_reduce_add, vector_reduce_fadd_fast, false);
arith_red!("simd_reduce_mul_unordered": vector_reduce_mul, vector_reduce_fmul_fast, false);

macro_rules! minmax_red {
($name:tt: $int_red:ident, $float_red:ident) => {
if name == $name {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
return match in_elem.sty {
ty::TyInt(_i) => {
Ok(bx.$int_red(args[0].immediate(), true))
},
ty::TyUint(_u) => {
Ok(bx.$int_red(args[0].immediate(), false))
},
ty::TyFloat(_f) => {
Ok(bx.$float_red(args[0].immediate()))
}
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
$name, in_ty, in_elem, ret_ty)
},
}
}

}
}

minmax_red!("simd_reduce_min": vector_reduce_min, vector_reduce_fmin);
minmax_red!("simd_reduce_max": vector_reduce_max, vector_reduce_fmax);

minmax_red!("simd_reduce_min_nanless": vector_reduce_min, vector_reduce_fmin_fast);
minmax_red!("simd_reduce_max_nanless": vector_reduce_max, vector_reduce_fmax_fast);

macro_rules! bitwise_red {
($name:tt : $red:ident, $boolean:expr) => {
if name == $name {
let input = if !$boolean {
require!(ret_ty == in_elem,
"expected return type `{}` (element of input `{}`), found `{}`",
in_elem, in_ty, ret_ty);
args[0].immediate()
} else {
// boolean reductions operate on vectors of i1s:
let i1 = Type::i1(bx.cx);
let i1xn = Type::vector(&i1, in_len as u64);
bx.trunc(args[0].immediate(), i1xn)
};
return match in_elem.sty {
ty::TyInt(_) | ty::TyUint(_) => {
let r = bx.$red(input);
Ok(
if !$boolean {
r
} else {
bx.zext(r, Type::bool(bx.cx))
}
)
},
_ => {
return_error!("unsupported {} from `{}` with element `{}` to `{}`",
$name, in_ty, in_elem, ret_ty)
},
}
}
}
}

bitwise_red!("simd_reduce_and": vector_reduce_and, false);
bitwise_red!("simd_reduce_or": vector_reduce_or, false);
bitwise_red!("simd_reduce_xor": vector_reduce_xor, false);
bitwise_red!("simd_reduce_all": vector_reduce_and, true);
bitwise_red!("simd_reduce_any": vector_reduce_or, true);

if name == "simd_cast" {
require_simd!(ret_ty, "return");
let out_len = ret_ty.simd_size(tcx);
Expand Down
8 changes: 8 additions & 0 deletions src/librustc_typeck/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,14 @@ pub fn check_platform_intrinsic_type<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
"simd_insert" => (2, vec![param(0), tcx.types.u32, param(1)], param(0)),
"simd_extract" => (2, vec![param(0), tcx.types.u32], param(1)),
"simd_cast" => (2, vec![param(0)], param(1)),
"simd_reduce_all" | "simd_reduce_any" => (1, vec![param(0)], tcx.types.bool),
"simd_reduce_add_ordered" | "simd_reduce_mul_ordered"
=> (2, vec![param(0), param(1)], param(1)),
"simd_reduce_add_unordered" | "simd_reduce_mul_unordered" |
"simd_reduce_and" | "simd_reduce_or" | "simd_reduce_xor" |
"simd_reduce_min" | "simd_reduce_max" |
"simd_reduce_min_nanless" | "simd_reduce_max_nanless"
=> (2, vec![param(0)], param(1)),
name if name.starts_with("simd_shuffle") => {
match name["simd_shuffle".len()..].parse() {
Ok(n) => {
Expand Down
Loading