diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index ef45e527a3..be2a70ca2a 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -171,6 +171,24 @@ pub unsafe fn vceqzq_s64(a: int64x2_t) -> uint64x2_t { simd_eq(a, transmute(b)) } +/// Signed compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmeq))] +pub unsafe fn vceqz_p8(a: poly8x8_t) -> uint8x8_t { + let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_eq(a, transmute(b)) +} + +/// Signed compare bitwise equal to zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmeq))] +pub unsafe fn vceqzq_p8(a: poly8x16_t) -> uint8x16_t { + let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_eq(a, transmute(b)) +} + /// Signed compare bitwise equal to zero #[inline] #[target_feature(enable = "neon")] @@ -1278,6 +1296,22 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vceqz_p8() { + let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0, 0xFF, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vceqz_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqzq_p8() { + let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let e: u8x16 = u8x16::new(0, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vceqzq_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vceqz_p64() { let a: i64x1 = i64x1::new(-9223372036854775808); diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs index c033fab20d..cee70f3d69 100644 --- a/crates/core_arch/src/arm/neon/generated.rs +++ b/crates/core_arch/src/arm/neon/generated.rs @@ -833,6 +833,26 @@ pub unsafe fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { simd_eq(a, b) } +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceq_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { + simd_eq(a, b) +} + /// Floating-point compare equal #[inline] #[target_feature(enable = "neon")] @@ -925,6 +945,30 @@ pub unsafe fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { simd_ne(c, transmute(d)) } +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +pub unsafe fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { + let c: poly8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +pub unsafe fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { + let c: poly8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + /// Unsigned compare bitwise Test bits nonzero #[inline] #[target_feature(enable = "neon")] @@ -4147,6 +4191,36 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_p8() { + let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_p8() { + let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vceq_f32() { let a: f32x2 = f32x2::new(1.2, 3.4); @@ -4219,6 +4293,24 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vtst_p8() { + let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vtst_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_p8() { + let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vtstq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vtst_u8() { let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index ebad10847f..e250fb8d38 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -158,7 +158,7 @@ aarch64 = cmeq generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t arm = vceq. -generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t +generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t /// Floating-point compare equal name = vceq @@ -182,7 +182,7 @@ fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 validate FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE aarch64 = cmeq -generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t /// Unsigned compare bitwise equal to zero name = vceqz @@ -218,7 +218,7 @@ aarch64 = cmtst generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t arm = vtst -generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, poly8x8_t:uint8x8_t, poly8x16_t:uint8x16_t /// Unsigned compare bitwise Test bits nonzero name = vtst diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index 419fc70840..ae1631701b 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -72,6 +72,8 @@ fn type_len(t: &str) -> usize { "float32x4_t" => 4, "float64x1_t" => 1, "float64x2_t" => 2, + "poly8x8_t" => 8, + "poly8x16_t" => 16, "poly64x1_t" => 1, "poly64x2_t" => 2, _ => panic!("unknown type: {}", t), @@ -102,6 +104,8 @@ fn type_to_suffix(t: &str) -> &str { "float32x4_t" => "q_f32", "float64x1_t" => "_f64", "float64x2_t" => "q_f64", + "poly8x8_t" => "_p8", + "poly8x16_t" => "q_p8", "poly64x1_t" => "_p64", "poly64x2_t" => "q_p64", _ => panic!("unknown type: {}", t), @@ -132,6 +136,8 @@ fn type_to_global_type(t: &str) -> &str { "float32x4_t" => "f32x4", "float64x1_t" => "f64", "float64x2_t" => "f64x2", + "poly8x8_t" => "i8x8", + "poly8x16_t" => "i8x16", "poly64x1_t" => "i64x1", "poly64x2_t" => "i64x2", _ => panic!("unknown type: {}", t), @@ -650,6 +656,8 @@ fn expand_intrinsic(intr: &str, t: &str) -> String { "float32x4_t" => "f32", "float64x1_t" => "f64", "float64x2_t" => "f64", + "poly8x8_t" => "i8", + "poly8x16_t" => "i8", /* "poly64x1_t" => "i64x1", "poly64x2_t" => "i64x2",