Skip to content

Commit

Permalink
add neon instruction vmaxnm_f* vpmaxnm_f* vminnm_f* vpminnm_f* (#1105)
Browse files Browse the repository at this point in the history
  • Loading branch information
surechen committed Apr 6, 2021
1 parent 15babf5 commit daae8f8
Show file tree
Hide file tree
Showing 4 changed files with 416 additions and 1 deletion.
220 changes: 220 additions & 0 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2538,6 +2538,71 @@ pub unsafe fn vmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
vmaxq_f64_(a, b)
}

/// Floating-point Maximun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnm))]
pub unsafe fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v1f64")]
fn vmaxnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
}
vmaxnm_f64_(a, b)
}

/// Floating-point Maximun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnm))]
pub unsafe fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f64")]
fn vmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vmaxnmq_f64_(a, b)
}

/// Floating-point Maximum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnmp))]
pub unsafe fn vpmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f32")]
fn vpmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vpmaxnm_f32_(a, b)
}

/// Floating-point Maximum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnmp))]
pub unsafe fn vpmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v2f64")]
fn vpmaxnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vpmaxnmq_f64_(a, b)
}

/// Floating-point Maximum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmaxnmp))]
pub unsafe fn vpmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnmp.v4f32")]
fn vpmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vpmaxnmq_f32_(a, b)
}

/// Minimum (vector)
#[inline]
#[target_feature(enable = "neon")]
Expand All @@ -2564,6 +2629,71 @@ pub unsafe fn vminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
vminq_f64_(a, b)
}

/// Floating-point Minimun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnm))]
pub unsafe fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v1f64")]
fn vminnm_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
}
vminnm_f64_(a, b)
}

/// Floating-point Minimun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnm))]
pub unsafe fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f64")]
fn vminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vminnmq_f64_(a, b)
}

/// Floating-point Minimum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnmp))]
pub unsafe fn vpminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f32")]
fn vpminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vpminnm_f32_(a, b)
}

/// Floating-point Minimum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnmp))]
pub unsafe fn vpminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v2f64")]
fn vpminnmq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vpminnmq_f64_(a, b)
}

/// Floating-point Minimum Number Pairwise (vector).
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fminnmp))]
pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnmp.v4f32")]
fn vpminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vpminnmq_f32_(a, b)
}

/// Calculates the square root of each lane.
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -6347,6 +6477,51 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmaxnm_f64() {
let a: f64 = 1.0;
let b: f64 = 8.0;
let e: f64 = 8.0;
let r: f64 = transmute(vmaxnm_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmaxnmq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(8.0, 16.0);
let e: f64x2 = f64x2::new(8.0, 16.0);
let r: f64x2 = transmute(vmaxnmq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vpmaxnm_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let b: f32x2 = f32x2::new(6.0, -3.0);
let e: f32x2 = f32x2::new(2.0, 6.0);
let r: f32x2 = transmute(vpmaxnm_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vpmaxnmq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(6.0, -3.0);
let e: f64x2 = f64x2::new(2.0, 6.0);
let r: f64x2 = transmute(vpmaxnmq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vpmaxnmq_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
let e: f32x4 = f32x4::new(2.0, 3.0, 16.0, 6.0);
let r: f32x4 = transmute(vpmaxnmq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmin_f64() {
let a: f64 = 1.0;
Expand All @@ -6365,6 +6540,51 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vminnm_f64() {
let a: f64 = 1.0;
let b: f64 = 8.0;
let e: f64 = 1.0;
let r: f64 = transmute(vminnm_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vminnmq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(8.0, 16.0);
let e: f64x2 = f64x2::new(1.0, 2.0);
let r: f64x2 = transmute(vminnmq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vpminnm_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let b: f32x2 = f32x2::new(6.0, -3.0);
let e: f32x2 = f32x2::new(1.0, -3.0);
let r: f32x2 = transmute(vpminnm_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vpminnmq_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let b: f64x2 = f64x2::new(6.0, -3.0);
let e: f64x2 = f64x2::new(1.0, -3.0);
let r: f64x2 = transmute(vpminnmq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vpminnmq_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
let e: f32x4 = f32x4::new(1.0, -4.0, 8.0, -1.0);
let r: f32x4 = transmute(vpminnmq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vsqrt_f32() {
let a: f32x2 = f32x2::new(4.0, 9.0);
Expand Down
100 changes: 100 additions & 0 deletions crates/core_arch/src/arm/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4906,6 +4906,38 @@ pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
vmaxq_f32_(a, b)
}

/// Floating-point Maximun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")]
fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vmaxnm_f32_(a, b)
}

/// Floating-point Maximun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))]
pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")]
fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vmaxnmq_f32_(a, b)
}

/// Minimum (vector)
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -5130,6 +5162,38 @@ pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
vminq_f32_(a, b)
}

/// Floating-point Minimun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")]
fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vminnm_f32_(a, b)
}

/// Floating-point Minimun Number (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))]
pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")]
fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vminnmq_f32_(a, b)
}

/// Reciprocal square-root estimate.
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -11292,6 +11356,24 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmaxnm_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let b: f32x2 = f32x2::new(8.0, 16.0);
let e: f32x2 = f32x2::new(8.0, 16.0);
let r: f32x2 = transmute(vmaxnm_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmaxnmq_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
let e: f32x4 = f32x4::new(8.0, 16.0, 3.0, 6.0);
let r: f32x4 = transmute(vmaxnmq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vmin_s8() {
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
Expand Down Expand Up @@ -11418,6 +11500,24 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vminnm_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let b: f32x2 = f32x2::new(8.0, 16.0);
let e: f32x2 = f32x2::new(1.0, 2.0);
let r: f32x2 = transmute(vminnm_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vminnmq_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0);
let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0);
let e: f32x4 = f32x4::new(1.0, 2.0, -1.0, -4.0);
let r: f32x4 = transmute(vminnmq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vrsqrte_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
Expand Down
Loading

0 comments on commit daae8f8

Please sign in to comment.