diff --git a/crates/core_arch/avx512f.md b/crates/core_arch/avx512f.md index 567fd0e7ce..13fe1146d9 100644 --- a/crates/core_arch/avx512f.md +++ b/crates/core_arch/avx512f.md @@ -1,17 +1,602 @@ ["AVX512F"]

+ + * [x] [`_mm512_abs_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi32&expand=5236) + * [x] [`_mm512_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_epi64&expand=5236) + * [x] [`_mm512_abs_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_pd&expand=5236) + * [x] [`_mm512_abs_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ps&expand=5236) + * [x] [`_mm512_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi32&expand=5236) + * [x] [`_mm512_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_epi64&expand=5236) + * [x] [`_mm512_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_pd&expand=5236) + * [x] [`_mm512_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_ps&expand=5236) + * [ ] [`_mm512_add_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_pd&expand=5236) + * [ ] [`_mm512_add_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_add_round_ps&expand=5236) + * [ ] [`_mm512_alignr_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_alignr_epi32&expand=5236) + * [ ] [`_mm512_alignr_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_alignr_epi64&expand=5236) * [x] [`_mm512_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi32&expand=5236) * [x] [`_mm512_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi64&expand=5236) * [x] [`_mm512_and_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_si512&expand=5236) + * [ ] [`_mm512_andnot_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_andnot_epi32&expand=5236) + * [ ] [`_mm512_andnot_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_andnot_epi64&expand=5236) + * [ ] [`_mm512_andnot_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_andnot_si512&expand=5236) + * [ ] [`_mm512_broadcast_f32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcast_f32x4&expand=5236) + * [ ] [`_mm512_broadcast_f64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcast_f64x4&expand=5236) + * [ ] [`_mm512_broadcast_i32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcast_i32x4&expand=5236) + * [ ] [`_mm512_broadcast_i64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcast_i64x4&expand=5236) + * [ ] [`_mm512_broadcastd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastd_epi32&expand=5236) + * [ ] [`_mm512_broadcastq_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastq_epi64&expand=5236) + * [ ] [`_mm512_broadcastsd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastsd_pd&expand=5236) + * [ ] [`_mm512_broadcastss_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastss_ps&expand=5236) + * [ ] [`_mm512_castpd128_pd512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd128_pd512&expand=5236) + * [ ] [`_mm512_castpd256_pd512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd256_pd512&expand=5236) + * [ ] [`_mm512_castpd512_pd128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd512_pd128&expand=5236) + * [ ] [`_mm512_castpd512_pd256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd512_pd256&expand=5236) + * [ ] [`_mm512_castpd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ps&expand=5236) + * [ ] [`_mm512_castpd_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_si512&expand=5236) + * [ ] [`_mm512_castps128_ps512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps128_ps512&expand=5236) + * [ ] [`_mm512_castps256_ps512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps256_ps512&expand=5236) + * [ ] [`_mm512_castps512_ps128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps512_ps128&expand=5236) + * [ ] [`_mm512_castps512_ps256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps512_ps256&expand=5236) + * [ ] [`_mm512_castps_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_pd&expand=5236) + * [ ] [`_mm512_castps_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_si512&expand=5236) + * [ ] [`_mm512_castsi128_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi128_si512&expand=5236) + * [ ] [`_mm512_castsi256_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi256_si512&expand=5236) + * [ ] [`_mm512_castsi512_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_pd&expand=5236) + * [ ] [`_mm512_castsi512_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ps&expand=5236) + * [ ] [`_mm512_castsi512_si128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_si128&expand=5236) + * [ ] [`_mm512_castsi512_si256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_si256&expand=5236) + * [x] [`_mm512_cmp_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi32_mask&expand=5236) + * [x] [`_mm512_cmp_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epi64_mask&expand=5236) + * [x] [`_mm512_cmp_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu32_mask&expand=5236) + * [x] [`_mm512_cmp_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_epu64_mask&expand=5236) + * [x] [`_mm512_cmp_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_pd_mask&expand=5236) + * [x] [`_mm512_cmp_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_ps_mask&expand=5236) + * [x] [`_mm512_cmp_round_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_pd_mask&expand=5236) + * [x] [`_mm512_cmp_round_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmp_round_ps_mask&expand=5236) + * [x] [`_mm512_cmpeq_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi32_mask&expand=5236) + * [x] [`_mm512_cmpeq_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epi64_mask&expand=5236) + * [x] [`_mm512_cmpeq_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu32_mask&expand=5236) + * [x] [`_mm512_cmpeq_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_epu64_mask&expand=5236) + * [x] [`_mm512_cmpeq_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_pd_mask&expand=5236) + * [x] [`_mm512_cmpeq_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpeq_ps_mask&expand=5236) + * [x] [`_mm512_cmpge_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi32_mask&expand=5236) + * [x] [`_mm512_cmpge_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epi64_mask&expand=5236) + * [x] [`_mm512_cmpge_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu32_mask&expand=5236) + * [x] [`_mm512_cmpge_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpge_epu64_mask&expand=5236) + * [x] [`_mm512_cmpgt_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi32_mask&expand=5236) + * [x] [`_mm512_cmpgt_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epi64_mask&expand=5236) + * [x] [`_mm512_cmpgt_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu32_mask&expand=5236) + * [x] [`_mm512_cmpgt_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpgt_epu64_mask&expand=5236) + * [x] [`_mm512_cmple_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi32_mask&expand=5236) + * [x] [`_mm512_cmple_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epi64_mask&expand=5236) + * [x] [`_mm512_cmple_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu32_mask&expand=5236) + * [x] [`_mm512_cmple_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_epu64_mask&expand=5236) + * [x] [`_mm512_cmple_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_pd_mask&expand=5236) + * [x] [`_mm512_cmple_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmple_ps_mask&expand=5236) + * [x] [`_mm512_cmplt_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi32_mask&expand=5236) + * [x] [`_mm512_cmplt_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epi64_mask&expand=5236) + * [x] [`_mm512_cmplt_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epu32_mask&expand=5236) + * [x] [`_mm512_cmplt_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_epu64_mask&expand=5236) + * [x] [`_mm512_cmplt_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_pd_mask&expand=5236) + * [x] [`_mm512_cmplt_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmplt_ps_mask&expand=5236) + * [x] [`_mm512_cmpneq_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi32_mask&expand=5236) + * [x] [`_mm512_cmpneq_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epi64_mask&expand=5236) + * [x] [`_mm512_cmpneq_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu32_mask&expand=5236) + * [x] [`_mm512_cmpneq_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_epu64_mask&expand=5236) + * [x] [`_mm512_cmpneq_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_pd_mask&expand=5236) + * [x] [`_mm512_cmpneq_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpneq_ps_mask&expand=5236) + * [x] [`_mm512_cmpnle_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpnle_pd_mask&expand=5236) + * [x] [`_mm512_cmpnle_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpnle_ps_mask&expand=5236) + * [x] [`_mm512_cmpnlt_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpnlt_pd_mask&expand=5236) + * [x] [`_mm512_cmpnlt_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpnlt_ps_mask&expand=5236) + * [x] [`_mm512_cmpord_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpord_pd_mask&expand=5236) + * [x] [`_mm512_cmpord_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpord_ps_mask&expand=5236) + * [x] [`_mm512_cmpunord_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpunord_pd_mask&expand=5236) + * [x] [`_mm512_cmpunord_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cmpunord_ps_mask&expand=5236) + * [ ] [`_mm512_cvt_roundepi32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepi32_ps&expand=5236) + * [ ] [`_mm512_cvt_roundepu32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundepu32_ps&expand=5236) + * [ ] [`_mm512_cvt_roundpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_epi32&expand=5236) + * [ ] [`_mm512_cvt_roundpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_epu32&expand=5236) + * [ ] [`_mm512_cvt_roundpd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundpd_ps&expand=5236) + * [ ] [`_mm512_cvt_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundph_ps&expand=5236) + * [ ] [`_mm512_cvt_roundps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_epi32&expand=5236) + * [ ] [`_mm512_cvt_roundps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_epu32&expand=5236) + * [ ] [`_mm512_cvt_roundps_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_pd&expand=5236) + * [ ] [`_mm512_cvt_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvt_roundps_ph&expand=5236) + * [ ] [`_mm512_cvtepi16_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_epi32&expand=5236) + * [ ] [`_mm512_cvtepi16_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_epi64&expand=5236) + * [ ] [`_mm512_cvtepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_epi16&expand=5236) + * [ ] [`_mm512_cvtepi32_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_epi64&expand=5236) + * [ ] [`_mm512_cvtepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_epi8&expand=5236) + * [ ] [`_mm512_cvtepi32_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_pd&expand=5236) + * [ ] [`_mm512_cvtepi32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ps&expand=5236) + * [ ] [`_mm512_cvtepi32lo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32lo_pd&expand=5236) + * [ ] [`_mm512_cvtepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_epi16&expand=5236) + * [ ] [`_mm512_cvtepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_epi32&expand=5236) + * [ ] [`_mm512_cvtepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi64_epi8&expand=5236) + * [ ] [`_mm512_cvtepi8_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi8_epi32&expand=5236) + * [ ] [`_mm512_cvtepi8_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi8_epi64&expand=5236) + * [ ] [`_mm512_cvtepu16_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_epi32&expand=5236) + * [ ] [`_mm512_cvtepu16_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_epi64&expand=5236) + * [ ] [`_mm512_cvtepu32_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_epi64&expand=5236) + * [ ] [`_mm512_cvtepu32_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_pd&expand=5236) + * [ ] [`_mm512_cvtepu32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32_ps&expand=5236) + * [ ] [`_mm512_cvtepu32lo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu32lo_pd&expand=5236) + * [ ] [`_mm512_cvtepu8_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi32&expand=5236) + * [ ] [`_mm512_cvtepu8_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi64&expand=5236) + * [ ] [`_mm512_cvtpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_epi32&expand=5236) + * [ ] [`_mm512_cvtpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_epu32&expand=5236) + * [ ] [`_mm512_cvtpd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_ps&expand=5236) + * [ ] [`_mm512_cvtpd_pslo`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_pslo&expand=5236) + * [ ] [`_mm512_cvtph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtph_ps&expand=5236) + * [ ] [`_mm512_cvtps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_epi32&expand=5236) + * [ ] [`_mm512_cvtps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_epu32&expand=5236) + * [ ] [`_mm512_cvtps_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_pd&expand=5236) + * [ ] [`_mm512_cvtps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_ph&expand=5236) + * [ ] [`_mm512_cvtpslo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpslo_pd&expand=5236) + * [ ] [`_mm512_cvtsepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi32_epi16&expand=5236) + * [ ] [`_mm512_cvtsepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi32_epi8&expand=5236) + * [ ] [`_mm512_cvtsepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi16&expand=5236) + * [ ] [`_mm512_cvtsepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi32&expand=5236) + * [ ] [`_mm512_cvtsepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi8&expand=5236) + * [ ] [`_mm512_cvtt_roundpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundpd_epi32&expand=5236) + * [ ] [`_mm512_cvtt_roundpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundpd_epu32&expand=5236) + * [ ] [`_mm512_cvtt_roundps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundps_epi32&expand=5236) + * [ ] [`_mm512_cvtt_roundps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtt_roundps_epu32&expand=5236) + * [ ] [`_mm512_cvttpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttpd_epi32&expand=5236) + * [ ] [`_mm512_cvttpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttpd_epu32&expand=5236) + * [ ] [`_mm512_cvttps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttps_epi32&expand=5236) + * [ ] [`_mm512_cvttps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttps_epu32&expand=5236) + * [ ] [`_mm512_cvtusepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi32_epi16&expand=5236) + * [ ] [`_mm512_cvtusepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi32_epi8&expand=5236) + * [ ] [`_mm512_cvtusepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi16&expand=5236) + * [ ] [`_mm512_cvtusepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi32&expand=5236) + * [ ] [`_mm512_cvtusepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi8&expand=5236) + * [x] [`_mm512_div_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_pd&expand=5236) + * [x] [`_mm512_div_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_ps&expand=5236) + * [ ] [`_mm512_div_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_pd&expand=5236) + * [ ] [`_mm512_div_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_div_round_ps&expand=5236) + * [ ] [`_mm512_extractf32x4_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extractf32x4_ps&expand=5236) + * [ ] [`_mm512_extractf64x4_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extractf64x4_pd&expand=5236) + * [ ] [`_mm512_extracti32x4_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extracti32x4_epi32&expand=5236) + * [ ] [`_mm512_extracti64x4_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_extracti64x4_epi64&expand=5236) + * [ ] [`_mm512_fixupimm_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fixupimm_pd&expand=5236) + * [ ] [`_mm512_fixupimm_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fixupimm_ps&expand=5236) + * [ ] [`_mm512_fixupimm_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fixupimm_round_pd&expand=5236) + * [ ] [`_mm512_fixupimm_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fixupimm_round_ps&expand=5236) + * [ ] [`_mm512_fmadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_pd&expand=5236) + * [ ] [`_mm512_fmadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_ps&expand=5236) + * [ ] [`_mm512_fmadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_pd&expand=5236) + * [ ] [`_mm512_fmadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmadd_round_ps&expand=5236) + * [ ] [`_mm512_fmaddsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_pd&expand=5236) + * [ ] [`_mm512_fmaddsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_ps&expand=5236) + * [ ] [`_mm512_fmaddsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_pd&expand=5236) + * [ ] [`_mm512_fmaddsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmaddsub_round_ps&expand=5236) + * [ ] [`_mm512_fmsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_pd&expand=5236) + * [ ] [`_mm512_fmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_ps&expand=5236) + * [ ] [`_mm512_fmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_pd&expand=5236) + * [ ] [`_mm512_fmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsub_round_ps&expand=5236) + * [ ] [`_mm512_fmsubadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_pd&expand=5236) + * [ ] [`_mm512_fmsubadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_ps&expand=5236) + * [ ] [`_mm512_fmsubadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_pd&expand=5236) + * [ ] [`_mm512_fmsubadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fmsubadd_round_ps&expand=5236) + * [ ] [`_mm512_fnmadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_pd&expand=5236) + * [ ] [`_mm512_fnmadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_ps&expand=5236) + * [ ] [`_mm512_fnmadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_pd&expand=5236) + * [ ] [`_mm512_fnmadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmadd_round_ps&expand=5236) + * [ ] [`_mm512_fnmsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_pd&expand=5236) + * [ ] [`_mm512_fnmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_ps&expand=5236) + * [ ] [`_mm512_fnmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_pd&expand=5236) + * [ ] [`_mm512_fnmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_fnmsub_round_ps&expand=5236) + * [ ] [`_mm512_getexp_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_pd&expand=5236) + * [ ] [`_mm512_getexp_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_ps&expand=5236) + * [ ] [`_mm512_getexp_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_pd&expand=5236) + * [ ] [`_mm512_getexp_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getexp_round_ps&expand=5236) + * [ ] [`_mm512_getmant_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_pd&expand=5236) + * [ ] [`_mm512_getmant_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_ps&expand=5236) + * [ ] [`_mm512_getmant_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_pd&expand=5236) + * [ ] [`_mm512_getmant_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_getmant_round_ps&expand=5236) + * [ ] [`_mm512_i32extgather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32extgather_epi32&expand=5236) + * [ ] [`_mm512_i32extgather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32extgather_ps&expand=5236) + * [ ] [`_mm512_i32extscatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32extscatter_epi32&expand=5236) + * [ ] [`_mm512_i32extscatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32extscatter_ps&expand=5236) + * [x] [`_mm512_i32gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi32&expand=5236) + * [x] [`_mm512_i32gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi64&expand=5236) + * [x] [`_mm512_i32gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_pd&expand=5236) + * [x] [`_mm512_i32gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_ps&expand=5236) + * [ ] [`_mm512_i32loextgather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loextgather_epi64&expand=5236) + * [ ] [`_mm512_i32loextgather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loextgather_pd&expand=5236) + * [ ] [`_mm512_i32loextscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loextscatter_epi64&expand=5236) + * [ ] [`_mm512_i32loextscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loextscatter_pd&expand=5236) + * [ ] [`_mm512_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64&expand=5236) + * [ ] [`_mm512_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd&expand=5236) + * [ ] [`_mm512_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd&expand=5236) + * [x] [`_mm512_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_epi32&expand=5236) + * [x] [`_mm512_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_epi64&expand=5236) + * [x] [`_mm512_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_pd&expand=5236) + * [x] [`_mm512_i32scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_ps&expand=5236) + * [x] [`_mm512_i64gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi32&expand=5236) + * [x] [`_mm512_i64gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi64&expand=5236) + * [x] [`_mm512_i64gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_pd&expand=5236) + * [x] [`_mm512_i64gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_ps&expand=5236) + * [x] [`_mm512_i64scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32&expand=5236) + * [x] [`_mm512_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi64&expand=5236) + * [x] [`_mm512_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_pd&expand=5236) + * [x] [`_mm512_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_ps&expand=5236) + * [ ] [`_mm512_insertf32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_insertf32x4&expand=5236) + * [ ] [`_mm512_insertf64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_insertf64x4&expand=5236) + * [ ] [`_mm512_inserti32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_inserti32x4&expand=5236) + * [ ] [`_mm512_inserti64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_inserti64x4&expand=5236) + * [ ] [`_mm512_int2mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_int2mask&expand=5236) * [x] [`_mm512_kand`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kand&expand=5236) + * [ ] [`_mm512_kandn`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kandn&expand=5236) + * [ ] [`_mm512_kmov`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kmov&expand=5236) + * [ ] [`_mm512_knot`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_knot&expand=5236) * [x] [`_mm512_kor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kor&expand=5236) + * [ ] [`_mm512_kortestc`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kortestc&expand=5236) + * [ ] [`_mm512_kortestz`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kortestz&expand=5236) + * [ ] [`_mm512_kunpackb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kunpackb&expand=5236) + * [ ] [`_mm512_kxnor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kxnor&expand=5236) * [x] [`_mm512_kxor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kxor&expand=5236) - * [x] [`_kand_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212) - * [x] [`_kor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239) - * [x] [`_kxor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291) + * [ ] [`_mm512_load_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_epi32&expand=5236) + * [ ] [`_mm512_load_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_epi64&expand=5236) + * [ ] [`_mm512_load_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_pd&expand=5236) + * [ ] [`_mm512_load_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_ps&expand=5236) + * [ ] [`_mm512_load_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_load_si512&expand=5236) + * [x] [`_mm512_loadu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd&expand=5236) + * [x] [`_mm512_loadu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps&expand=5236) + * [ ] [`_mm512_loadu_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_si512&expand=5236) + * [ ] [`_mm512_mask2_permutex2var_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_epi32&expand=5236) + * [ ] [`_mm512_mask2_permutex2var_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_epi64&expand=5236) + * [ ] [`_mm512_mask2_permutex2var_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_pd&expand=5236) + * [ ] [`_mm512_mask2_permutex2var_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2_permutex2var_ps&expand=5236) + * [ ] [`_mm512_mask2int`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask2int&expand=5236) + * [ ] [`_mm512_mask3_fmadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_pd&expand=5236) + * [ ] [`_mm512_mask3_fmadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_ps&expand=5236) + * [ ] [`_mm512_mask3_fmadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_pd&expand=5236) + * [ ] [`_mm512_mask3_fmadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmadd_round_ps&expand=5236) + * [ ] [`_mm512_mask3_fmaddsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_pd&expand=5236) + * [ ] [`_mm512_mask3_fmaddsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_ps&expand=5236) + * [ ] [`_mm512_mask3_fmaddsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_pd&expand=5236) + * [ ] [`_mm512_mask3_fmaddsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmaddsub_round_ps&expand=5236) + * [ ] [`_mm512_mask3_fmsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_pd&expand=5236) + * [ ] [`_mm512_mask3_fmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_ps&expand=5236) + * [ ] [`_mm512_mask3_fmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_pd&expand=5236) + * [ ] [`_mm512_mask3_fmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsub_round_ps&expand=5236) + * [ ] [`_mm512_mask3_fmsubadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_pd&expand=5236) + * [ ] [`_mm512_mask3_fmsubadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_ps&expand=5236) + * [ ] [`_mm512_mask3_fmsubadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_pd&expand=5236) + * [ ] [`_mm512_mask3_fmsubadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fmsubadd_round_ps&expand=5236) + * [ ] [`_mm512_mask3_fnmadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_pd&expand=5236) + * [ ] [`_mm512_mask3_fnmadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_ps&expand=5236) + * [ ] [`_mm512_mask3_fnmadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_pd&expand=5236) + * [ ] [`_mm512_mask3_fnmadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmadd_round_ps&expand=5236) + * [ ] [`_mm512_mask3_fnmsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_pd&expand=5236) + * [ ] [`_mm512_mask3_fnmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_ps&expand=5236) + * [ ] [`_mm512_mask3_fnmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_pd&expand=5236) + * [ ] [`_mm512_mask3_fnmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask3_fnmsub_round_ps&expand=5236) + * [x] [`_mm512_mask_abs_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi32&expand=5236) + * [x] [`_mm512_mask_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_epi64&expand=5236) + * [x] [`_mm512_mask_abs_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_pd&expand=5236) + * [x] [`_mm512_mask_abs_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_abs_ps&expand=5236) + * [x] [`_mm512_mask_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi32&expand=5236) + * [x] [`_mm512_mask_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_epi64&expand=5236) + * [x] [`_mm512_mask_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_pd&expand=5236) + * [x] [`_mm512_mask_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_ps&expand=5236) + * [ ] [`_mm512_mask_add_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_pd&expand=5236) + * [ ] [`_mm512_mask_add_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_add_round_ps&expand=5236) + * [ ] [`_mm512_mask_alignr_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_alignr_epi32&expand=5236) + * [ ] [`_mm512_mask_alignr_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_alignr_epi64&expand=5236) * [x] [`_mm512_mask_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi32&expand=5236) * [x] [`_mm512_mask_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi64&expand=5236) + * [ ] [`_mm512_mask_andnot_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_andnot_epi32&expand=5236) + * [ ] [`_mm512_mask_andnot_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_andnot_epi64&expand=5236) + * [ ] [`_mm512_mask_blend_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi32&expand=5236) + * [ ] [`_mm512_mask_blend_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_epi64&expand=5236) + * [ ] [`_mm512_mask_blend_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_pd&expand=5236) + * [ ] [`_mm512_mask_blend_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ps&expand=5236) + * [ ] [`_mm512_mask_broadcast_f32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcast_f32x4&expand=5236) + * [ ] [`_mm512_mask_broadcast_f64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcast_f64x4&expand=5236) + * [ ] [`_mm512_mask_broadcast_i32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcast_i32x4&expand=5236) + * [ ] [`_mm512_mask_broadcast_i64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcast_i64x4&expand=5236) + * [ ] [`_mm512_mask_broadcastd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastd_epi32&expand=5236) + * [ ] [`_mm512_mask_broadcastq_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastq_epi64&expand=5236) + * [ ] [`_mm512_mask_broadcastsd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastsd_pd&expand=5236) + * [ ] [`_mm512_mask_broadcastss_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_broadcastss_ps&expand=5236) + * [x] [`_mm512_mask_cmp_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi32_mask&expand=5236) + * [x] [`_mm512_mask_cmp_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epi64_mask&expand=5236) + * [x] [`_mm512_mask_cmp_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu32_mask&expand=5236) + * [x] [`_mm512_mask_cmp_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_epu64_mask&expand=5236) + * [x] [`_mm512_mask_cmp_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmp_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmp_round_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmp_round_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmp_round_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmpeq_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi32_mask&expand=5236) + * [x] [`_mm512_mask_cmpeq_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epi64_mask&expand=5236) + * [x] [`_mm512_mask_cmpeq_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu32_mask&expand=5236) + * [x] [`_mm512_mask_cmpeq_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_epu64_mask&expand=5236) + * [x] [`_mm512_mask_cmpeq_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmpeq_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpeq_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmpge_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi32_mask&expand=5236) + * [x] [`_mm512_mask_cmpge_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epi64_mask&expand=5236) + * [x] [`_mm512_mask_cmpge_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu32_mask&expand=5236) + * [x] [`_mm512_mask_cmpge_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpge_epu64_mask&expand=5236) + * [x] [`_mm512_mask_cmpgt_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi32_mask&expand=5236) + * [x] [`_mm512_mask_cmpgt_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epi64_mask&expand=5236) + * [x] [`_mm512_mask_cmpgt_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu32_mask&expand=5236) + * [x] [`_mm512_mask_cmpgt_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpgt_epu64_mask&expand=5236) + * [x] [`_mm512_mask_cmple_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi32_mask&expand=5236) + * [x] [`_mm512_mask_cmple_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epi64_mask&expand=5236) + * [x] [`_mm512_mask_cmple_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu32_mask&expand=5236) + * [x] [`_mm512_mask_cmple_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_epu64_mask&expand=5236) + * [x] [`_mm512_mask_cmple_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmple_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmple_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmplt_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi32_mask&expand=5236) + * [x] [`_mm512_mask_cmplt_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epi64_mask&expand=5236) + * [x] [`_mm512_mask_cmplt_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu32_mask&expand=5236) + * [x] [`_mm512_mask_cmplt_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_epu64_mask&expand=5236) + * [x] [`_mm512_mask_cmplt_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmplt_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmplt_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmpneq_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi32_mask&expand=5236) + * [x] [`_mm512_mask_cmpneq_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epi64_mask&expand=5236) + * [x] [`_mm512_mask_cmpneq_epu32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu32_mask&expand=5236) + * [x] [`_mm512_mask_cmpneq_epu64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_epu64_mask&expand=5236) + * [x] [`_mm512_mask_cmpneq_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmpneq_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpneq_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmpnle_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpnle_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmpnle_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpnle_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmpnlt_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpnlt_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmpnlt_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpnlt_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmpord_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpord_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmpord_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpord_ps_mask&expand=5236) + * [x] [`_mm512_mask_cmpunord_pd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpunord_pd_mask&expand=5236) + * [x] [`_mm512_mask_cmpunord_ps_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cmpunord_ps_mask&expand=5236) + * [ ] [`_mm512_mask_compress_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compress_epi32&expand=5236) + * [ ] [`_mm512_mask_compress_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compress_epi64&expand=5236) + * [ ] [`_mm512_mask_compress_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compress_pd&expand=5236) + * [ ] [`_mm512_mask_compress_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compress_ps&expand=5236) + * [ ] [`_mm512_mask_compressstoreu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_epi32&expand=5236) + * [ ] [`_mm512_mask_compressstoreu_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_epi64&expand=5236) + * [ ] [`_mm512_mask_compressstoreu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_pd&expand=5236) + * [ ] [`_mm512_mask_compressstoreu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_ps&expand=5236) + * [ ] [`_mm512_mask_cvt_roundepi32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepi32_ps&expand=5236) + * [ ] [`_mm512_mask_cvt_roundepu32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundepu32_ps&expand=5236) + * [ ] [`_mm512_mask_cvt_roundpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_epi32&expand=5236) + * [ ] [`_mm512_mask_cvt_roundpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_epu32&expand=5236) + * [ ] [`_mm512_mask_cvt_roundpd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundpd_ps&expand=5236) + * [ ] [`_mm512_mask_cvt_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundph_ps&expand=5236) + * [ ] [`_mm512_mask_cvt_roundps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_epi32&expand=5236) + * [ ] [`_mm512_mask_cvt_roundps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_epu32&expand=5236) + * [ ] [`_mm512_mask_cvt_roundps_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_pd&expand=5236) + * [ ] [`_mm512_mask_cvt_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvt_roundps_ph&expand=5236) + * [ ] [`_mm512_mask_cvtepi16_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtepi16_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi16_epi64&expand=5236) + * [ ] [`_mm512_mask_cvtepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtepi32_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_epi64&expand=5236) + * [ ] [`_mm512_mask_cvtepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtepi32_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_pd&expand=5236) + * [ ] [`_mm512_mask_cvtepi32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_ps&expand=5236) + * [ ] [`_mm512_mask_cvtepi32_storeu_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_storeu_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtepi32_storeu_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_storeu_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtepi32lo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32lo_pd&expand=5236) + * [ ] [`_mm512_mask_cvtepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtepi64_storeu_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtepi64_storeu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtepi64_storeu_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtepi8_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi8_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtepi8_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi8_epi64&expand=5236) + * [ ] [`_mm512_mask_cvtepu16_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtepu16_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu16_epi64&expand=5236) + * [ ] [`_mm512_mask_cvtepu32_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_epi64&expand=5236) + * [ ] [`_mm512_mask_cvtepu32_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_pd&expand=5236) + * [ ] [`_mm512_mask_cvtepu32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32_ps&expand=5236) + * [ ] [`_mm512_mask_cvtepu32lo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu32lo_pd&expand=5236) + * [ ] [`_mm512_mask_cvtepu8_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu8_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtepu8_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepu8_epi64&expand=5236) + * [ ] [`_mm512_mask_cvtpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_epu32&expand=5236) + * [ ] [`_mm512_mask_cvtpd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_ps&expand=5236) + * [ ] [`_mm512_mask_cvtpd_pslo`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_pslo&expand=5236) + * [ ] [`_mm512_mask_cvtph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtph_ps&expand=5236) + * [ ] [`_mm512_mask_cvtps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtps_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtps_epu32&expand=5236) + * [ ] [`_mm512_mask_cvtps_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtps_pd&expand=5236) + * [ ] [`_mm512_mask_cvtps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtps_ph&expand=5236) + * [ ] [`_mm512_mask_cvtpslo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpslo_pd&expand=5236) + * [ ] [`_mm512_mask_cvtsepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi32_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtsepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi32_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtsepi32_storeu_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtsepi32_storeu_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtsepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtsepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtsepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtsepi64_storeu_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtsepi64_storeu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtsepi64_storeu_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtt_roundpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundpd_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtt_roundpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundpd_epu32&expand=5236) + * [ ] [`_mm512_mask_cvtt_roundps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundps_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtt_roundps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtt_roundps_epu32&expand=5236) + * [ ] [`_mm512_mask_cvttpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttpd_epi32&expand=5236) + * [ ] [`_mm512_mask_cvttpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttpd_epu32&expand=5236) + * [ ] [`_mm512_mask_cvttps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttps_epi32&expand=5236) + * [ ] [`_mm512_mask_cvttps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvttps_epu32&expand=5236) + * [ ] [`_mm512_mask_cvtusepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi32_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtusepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi32_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtusepi32_storeu_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtusepi32_storeu_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtusepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtusepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtusepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_epi8&expand=5236) + * [ ] [`_mm512_mask_cvtusepi64_storeu_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=5236) + * [ ] [`_mm512_mask_cvtusepi64_storeu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=5236) + * [ ] [`_mm512_mask_cvtusepi64_storeu_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=5236) + * [x] [`_mm512_mask_div_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_pd&expand=5236) + * [x] [`_mm512_mask_div_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_ps&expand=5236) + * [ ] [`_mm512_mask_div_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_pd&expand=5236) + * [ ] [`_mm512_mask_div_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_div_round_ps&expand=5236) + * [ ] [`_mm512_mask_expand_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_epi32&expand=5236) + * [ ] [`_mm512_mask_expand_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_epi64&expand=5236) + * [ ] [`_mm512_mask_expand_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_pd&expand=5236) + * [ ] [`_mm512_mask_expand_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_ps&expand=5236) + * [ ] [`_mm512_mask_expandloadu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expandloadu_epi32&expand=5236) + * [ ] [`_mm512_mask_expandloadu_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expandloadu_epi64&expand=5236) + * [ ] [`_mm512_mask_expandloadu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expandloadu_pd&expand=5236) + * [ ] [`_mm512_mask_expandloadu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expandloadu_ps&expand=5236) + * [ ] [`_mm512_mask_extractf32x4_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_extractf32x4_ps&expand=5236) + * [ ] [`_mm512_mask_extractf64x4_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_extractf64x4_pd&expand=5236) + * [ ] [`_mm512_mask_extracti32x4_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_extracti32x4_epi32&expand=5236) + * [ ] [`_mm512_mask_extracti64x4_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_extracti64x4_epi64&expand=5236) + * [ ] [`_mm512_mask_fixupimm_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fixupimm_pd&expand=5236) + * [ ] [`_mm512_mask_fixupimm_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fixupimm_ps&expand=5236) + * [ ] [`_mm512_mask_fixupimm_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fixupimm_round_pd&expand=5236) + * [ ] [`_mm512_mask_fixupimm_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fixupimm_round_ps&expand=5236) + * [ ] [`_mm512_mask_fmadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_pd&expand=5236) + * [ ] [`_mm512_mask_fmadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_ps&expand=5236) + * [ ] [`_mm512_mask_fmadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_pd&expand=5236) + * [ ] [`_mm512_mask_fmadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmadd_round_ps&expand=5236) + * [ ] [`_mm512_mask_fmaddsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_pd&expand=5236) + * [ ] [`_mm512_mask_fmaddsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_ps&expand=5236) + * [ ] [`_mm512_mask_fmaddsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_pd&expand=5236) + * [ ] [`_mm512_mask_fmaddsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmaddsub_round_ps&expand=5236) + * [ ] [`_mm512_mask_fmsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_pd&expand=5236) + * [ ] [`_mm512_mask_fmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_ps&expand=5236) + * [ ] [`_mm512_mask_fmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_pd&expand=5236) + * [ ] [`_mm512_mask_fmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsub_round_ps&expand=5236) + * [ ] [`_mm512_mask_fmsubadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_pd&expand=5236) + * [ ] [`_mm512_mask_fmsubadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_ps&expand=5236) + * [ ] [`_mm512_mask_fmsubadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_pd&expand=5236) + * [ ] [`_mm512_mask_fmsubadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fmsubadd_round_ps&expand=5236) + * [ ] [`_mm512_mask_fnmadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_pd&expand=5236) + * [ ] [`_mm512_mask_fnmadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_ps&expand=5236) + * [ ] [`_mm512_mask_fnmadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_pd&expand=5236) + * [ ] [`_mm512_mask_fnmadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmadd_round_ps&expand=5236) + * [ ] [`_mm512_mask_fnmsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_pd&expand=5236) + * [ ] [`_mm512_mask_fnmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_ps&expand=5236) + * [ ] [`_mm512_mask_fnmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_pd&expand=5236) + * [ ] [`_mm512_mask_fnmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_fnmsub_round_ps&expand=5236) + * [ ] [`_mm512_mask_getexp_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_pd&expand=5236) + * [ ] [`_mm512_mask_getexp_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_ps&expand=5236) + * [ ] [`_mm512_mask_getexp_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_pd&expand=5236) + * [ ] [`_mm512_mask_getexp_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getexp_round_ps&expand=5236) + * [ ] [`_mm512_mask_getmant_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_pd&expand=5236) + * [ ] [`_mm512_mask_getmant_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_ps&expand=5236) + * [ ] [`_mm512_mask_getmant_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_pd&expand=5236) + * [ ] [`_mm512_mask_getmant_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_getmant_round_ps&expand=5236) + * [ ] [`_mm512_mask_i32extgather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32extgather_epi32&expand=5236) + * [ ] [`_mm512_mask_i32extgather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32extgather_ps&expand=5236) + * [ ] [`_mm512_mask_i32extscatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32extscatter_epi32&expand=5236) + * [ ] [`_mm512_mask_i32extscatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32extscatter_ps&expand=5236) + * [x] [`_mm512_mask_i32gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi32&expand=5236) + * [x] [`_mm512_mask_i32gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi64&expand=5236) + * [x] [`_mm512_mask_i32gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_pd&expand=5236) + * [x] [`_mm512_mask_i32gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_ps&expand=5236) + * [ ] [`_mm512_mask_i32loextgather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loextgather_epi64&expand=5236) + * [ ] [`_mm512_mask_i32loextgather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loextgather_pd&expand=5236) + * [ ] [`_mm512_mask_i32loextscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loextscatter_epi64&expand=5236) + * [ ] [`_mm512_mask_i32loextscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loextscatter_pd&expand=5236) + * [ ] [`_mm512_mask_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64&expand=5236) + * [ ] [`_mm512_mask_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd&expand=5236) + * [ ] [`_mm512_mask_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd&expand=5236) + * [x] [`_mm512_mask_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi32&expand=5236) + * [x] [`_mm512_mask_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi64&expand=5236) + * [x] [`_mm512_mask_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_pd&expand=5236) + * [x] [`_mm512_mask_i32scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_ps&expand=5236) + * [x] [`_mm512_mask_i64gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi32&expand=5236) + * [x] [`_mm512_mask_i64gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi64&expand=5236) + * [x] [`_mm512_mask_i64gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_pd&expand=5236) + * [x] [`_mm512_mask_i64gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_ps&expand=5236) + * [x] [`_mm512_mask_i64scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi32&expand=5236) + * [x] [`_mm512_mask_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi64&expand=5236) + * [x] [`_mm512_mask_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_pd&expand=5236) + * [x] [`_mm512_mask_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_ps&expand=5236) + * [ ] [`_mm512_mask_insertf32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_insertf32x4&expand=5236) + * [ ] [`_mm512_mask_insertf64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_insertf64x4&expand=5236) + * [ ] [`_mm512_mask_inserti32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_inserti32x4&expand=5236) + * [ ] [`_mm512_mask_inserti64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_inserti64x4&expand=5236) + * [ ] [`_mm512_mask_load_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_load_epi32&expand=5236) + * [ ] [`_mm512_mask_load_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_load_epi64&expand=5236) + * [ ] [`_mm512_mask_load_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_load_pd&expand=5236) + * [ ] [`_mm512_mask_load_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_load_ps&expand=5236) + * [ ] [`_mm512_mask_loadu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_loadu_epi32&expand=5236) + * [ ] [`_mm512_mask_loadu_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_loadu_epi64&expand=5236) + * [ ] [`_mm512_mask_loadu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_loadu_pd&expand=5236) + * [ ] [`_mm512_mask_loadu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_loadu_ps&expand=5236) + * [x] [`_mm512_mask_max_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi32&expand=5236) + * [x] [`_mm512_mask_max_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epi64&expand=5236) + * [x] [`_mm512_mask_max_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu32&expand=5236) + * [x] [`_mm512_mask_max_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_epu64&expand=5236) + * [x] [`_mm512_mask_max_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_pd&expand=5236) + * [x] [`_mm512_mask_max_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_ps&expand=5236) + * [ ] [`_mm512_mask_max_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_pd&expand=5236) + * [ ] [`_mm512_mask_max_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_max_round_ps&expand=5236) + * [x] [`_mm512_mask_min_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi32&expand=5236) + * [x] [`_mm512_mask_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epi64&expand=5236) + * [x] [`_mm512_mask_min_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu32&expand=5236) + * [x] [`_mm512_mask_min_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_epu64&expand=5236) + * [x] [`_mm512_mask_min_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_pd&expand=5236) + * [x] [`_mm512_mask_min_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_ps&expand=5236) + * [ ] [`_mm512_mask_min_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_pd&expand=5236) + * [ ] [`_mm512_mask_min_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_min_round_ps&expand=5236) + * [ ] [`_mm512_mask_mov_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi32&expand=5236) + * [ ] [`_mm512_mask_mov_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_epi64&expand=5236) + * [ ] [`_mm512_mask_mov_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_pd&expand=5236) + * [ ] [`_mm512_mask_mov_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mov_ps&expand=5236) + * [ ] [`_mm512_mask_movedup_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_movedup_pd&expand=5236) + * [ ] [`_mm512_mask_movehdup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_movehdup_ps&expand=5236) + * [ ] [`_mm512_mask_moveldup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_moveldup_ps&expand=5236) + * [x] [`_mm512_mask_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_epi32&expand=5236) + * [x] [`_mm512_mask_mul_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_epu32&expand=5236) + * [x] [`_mm512_mask_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_pd&expand=5236) + * [x] [`_mm512_mask_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_ps&expand=5236) + * [ ] [`_mm512_mask_mul_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_pd&expand=5236) + * [ ] [`_mm512_mask_mul_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mul_round_ps&expand=5236) + * [x] [`_mm512_mask_mullo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullo_epi32&expand=5236) + * [x] [`_mm512_mask_mullox_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_mullox_epi64&expand=5236) * [x] [`_mm512_mask_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi32&expand=5236) * [x] [`_mm512_mask_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi64&expand=5236) + * [ ] [`_mm512_mask_permute_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permute_pd&expand=5236) + * [ ] [`_mm512_mask_permute_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permute_ps&expand=5236) + * [ ] [`_mm512_mask_permutevar_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutevar_epi32&expand=5236) + * [ ] [`_mm512_mask_permutevar_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutevar_pd&expand=5236) + * [ ] [`_mm512_mask_permutevar_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutevar_ps&expand=5236) + * [ ] [`_mm512_mask_permutex2var_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_epi32&expand=5236) + * [ ] [`_mm512_mask_permutex2var_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_epi64&expand=5236) + * [ ] [`_mm512_mask_permutex2var_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_pd&expand=5236) + * [ ] [`_mm512_mask_permutex2var_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex2var_ps&expand=5236) + * [ ] [`_mm512_mask_permutex_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex_epi64&expand=5236) + * [ ] [`_mm512_mask_permutex_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutex_pd&expand=5236) + * [ ] [`_mm512_mask_permutexvar_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_epi32&expand=5236) + * [ ] [`_mm512_mask_permutexvar_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_epi64&expand=5236) + * [ ] [`_mm512_mask_permutexvar_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_pd&expand=5236) + * [ ] [`_mm512_mask_permutexvar_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_permutexvar_ps&expand=5236) + * [ ] [`_mm512_mask_rcp14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp14_pd&expand=5236) + * [ ] [`_mm512_mask_rcp14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rcp14_ps&expand=5236) + * [ ] [`_mm512_mask_reduce_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_add_epi32&expand=5236) + * [ ] [`_mm512_mask_reduce_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_add_epi64&expand=5236) + * [ ] [`_mm512_mask_reduce_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_add_pd&expand=5236) + * [ ] [`_mm512_mask_reduce_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_add_ps&expand=5236) + * [ ] [`_mm512_mask_reduce_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_and_epi32&expand=5236) + * [ ] [`_mm512_mask_reduce_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_and_epi64&expand=5236) + * [ ] [`_mm512_mask_reduce_max_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_epi32&expand=5236) + * [ ] [`_mm512_mask_reduce_max_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_epi64&expand=5236) + * [ ] [`_mm512_mask_reduce_max_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_epu32&expand=5236) + * [ ] [`_mm512_mask_reduce_max_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_epu64&expand=5236) + * [ ] [`_mm512_mask_reduce_max_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_pd&expand=5236) + * [ ] [`_mm512_mask_reduce_max_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_max_ps&expand=5236) + * [ ] [`_mm512_mask_reduce_min_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_epi32&expand=5236) + * [ ] [`_mm512_mask_reduce_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_epi64&expand=5236) + * [ ] [`_mm512_mask_reduce_min_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_epu32&expand=5236) + * [ ] [`_mm512_mask_reduce_min_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_epu64&expand=5236) + * [ ] [`_mm512_mask_reduce_min_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_pd&expand=5236) + * [ ] [`_mm512_mask_reduce_min_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_min_ps&expand=5236) + * [ ] [`_mm512_mask_reduce_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_mul_epi32&expand=5236) + * [ ] [`_mm512_mask_reduce_mul_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_mul_epi64&expand=5236) + * [ ] [`_mm512_mask_reduce_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_mul_pd&expand=5236) + * [ ] [`_mm512_mask_reduce_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_mul_ps&expand=5236) + * [ ] [`_mm512_mask_reduce_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_or_epi32&expand=5236) + * [ ] [`_mm512_mask_reduce_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_reduce_or_epi64&expand=5236) * [x] [`_mm512_mask_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=5236) * [x] [`_mm512_mask_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=5236) * [x] [`_mm512_mask_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi32&expand=5236) @@ -20,12 +605,35 @@ * [x] [`_mm512_mask_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi64&expand=5236) * [x] [`_mm512_mask_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi32&expand=5236) * [x] [`_mm512_mask_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi64&expand=5236) + * [ ] [`_mm512_mask_roundscale_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_pd&expand=5236) + * [ ] [`_mm512_mask_roundscale_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_ps&expand=5236) + * [ ] [`_mm512_mask_roundscale_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_pd&expand=5236) + * [ ] [`_mm512_mask_roundscale_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_roundscale_round_ps&expand=5236) + * [ ] [`_mm512_mask_rsqrt14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt14_pd&expand=5236) + * [ ] [`_mm512_mask_rsqrt14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rsqrt14_ps&expand=5236) + * [ ] [`_mm512_mask_scalef_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_pd&expand=5236) + * [ ] [`_mm512_mask_scalef_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_ps&expand=5236) + * [ ] [`_mm512_mask_scalef_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_pd&expand=5236) + * [ ] [`_mm512_mask_scalef_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_scalef_round_ps&expand=5236) + * [ ] [`_mm512_mask_set1_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi32&expand=5236) + * [x] [`_mm512_mask_set1_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_set1_epi64&expand=5236) + * [ ] [`_mm512_mask_shuffle_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_epi32&expand=5236) + * [ ] [`_mm512_mask_shuffle_f32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_f32x4&expand=5236) + * [ ] [`_mm512_mask_shuffle_f64x2`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_f64x2&expand=5236) + * [ ] [`_mm512_mask_shuffle_i32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_i32x4&expand=5236) + * [ ] [`_mm512_mask_shuffle_i64x2`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_i64x2&expand=5236) + * [ ] [`_mm512_mask_shuffle_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_pd&expand=5236) + * [ ] [`_mm512_mask_shuffle_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shuffle_ps&expand=5236) * [x] [`_mm512_mask_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi32&expand=5236) * [x] [`_mm512_mask_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi64&expand=5236) * [x] [`_mm512_mask_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi32&expand=5236) * [x] [`_mm512_mask_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi64&expand=5236) * [x] [`_mm512_mask_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi32&expand=5236) * [x] [`_mm512_mask_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi64&expand=5236) + * [x] [`_mm512_mask_sqrt_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_pd&expand=5236) + * [x] [`_mm512_mask_sqrt_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_ps&expand=5236) + * [ ] [`_mm512_mask_sqrt_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_pd&expand=5236) + * [ ] [`_mm512_mask_sqrt_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sqrt_round_ps&expand=5236) * [x] [`_mm512_mask_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi32&expand=5236) * [x] [`_mm512_mask_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi64&expand=5236) * [x] [`_mm512_mask_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi32&expand=5236) @@ -38,12 +646,229 @@ * [x] [`_mm512_mask_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi64&expand=5236) * [x] [`_mm512_mask_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi32&expand=5236) * [x] [`_mm512_mask_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi64&expand=5236) + * [ ] [`_mm512_mask_store_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_store_epi32&expand=5236) + * [ ] [`_mm512_mask_store_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_store_epi64&expand=5236) + * [ ] [`_mm512_mask_store_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_store_pd&expand=5236) + * [ ] [`_mm512_mask_store_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_store_ps&expand=5236) + * [ ] [`_mm512_mask_storeu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_epi32&expand=5236) + * [ ] [`_mm512_mask_storeu_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_epi64&expand=5236) + * [ ] [`_mm512_mask_storeu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_pd&expand=5236) + * [ ] [`_mm512_mask_storeu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_storeu_ps&expand=5236) + * [x] [`_mm512_mask_sub_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi32&expand=5236) + * [x] [`_mm512_mask_sub_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_epi64&expand=5236) + * [x] [`_mm512_mask_sub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_pd&expand=5236) + * [x] [`_mm512_mask_sub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_ps&expand=5236) + * [ ] [`_mm512_mask_sub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_pd&expand=5236) + * [ ] [`_mm512_mask_sub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sub_round_ps&expand=5236) + * [ ] [`_mm512_mask_ternarylogic_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ternarylogic_epi32&expand=5236) + * [ ] [`_mm512_mask_ternarylogic_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ternarylogic_epi64&expand=5236) + * [ ] [`_mm512_mask_test_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi32_mask&expand=5236) + * [ ] [`_mm512_mask_test_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_test_epi64_mask&expand=5236) + * [ ] [`_mm512_mask_testn_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_testn_epi32_mask&expand=5236) + * [ ] [`_mm512_mask_testn_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_testn_epi64_mask&expand=5236) + * [ ] [`_mm512_mask_unpackhi_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi32&expand=5236) + * [ ] [`_mm512_mask_unpackhi_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_epi64&expand=5236) + * [ ] [`_mm512_mask_unpackhi_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_pd&expand=5236) + * [ ] [`_mm512_mask_unpackhi_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpackhi_ps&expand=5236) + * [ ] [`_mm512_mask_unpacklo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi32&expand=5236) + * [ ] [`_mm512_mask_unpacklo_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_epi64&expand=5236) + * [ ] [`_mm512_mask_unpacklo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_pd&expand=5236) + * [ ] [`_mm512_mask_unpacklo_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_unpacklo_ps&expand=5236) * [x] [`_mm512_mask_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi32&expand=5236) * [x] [`_mm512_mask_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi64&expand=5236) + * [x] [`_mm512_maskz_abs_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi32&expand=5236) + * [x] [`_mm512_maskz_abs_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_abs_epi64&expand=5236) + * [x] [`_mm512_maskz_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi32&expand=5236) + * [x] [`_mm512_maskz_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_epi64&expand=5236) + * [x] [`_mm512_maskz_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_pd&expand=5236) + * [x] [`_mm512_maskz_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_ps&expand=5236) + * [ ] [`_mm512_maskz_add_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_pd&expand=5236) + * [ ] [`_mm512_maskz_add_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_add_round_ps&expand=5236) + * [ ] [`_mm512_maskz_alignr_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_alignr_epi32&expand=5236) + * [ ] [`_mm512_maskz_alignr_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_alignr_epi64&expand=5236) * [x] [`_mm512_maskz_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi32&expand=5236) * [x] [`_mm512_maskz_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi64&expand=5236) + * [ ] [`_mm512_maskz_andnot_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_andnot_epi32&expand=5236) + * [ ] [`_mm512_maskz_andnot_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_andnot_epi64&expand=5236) + * [ ] [`_mm512_maskz_broadcast_f32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcast_f32x4&expand=5236) + * [ ] [`_mm512_maskz_broadcast_f64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcast_f64x4&expand=5236) + * [ ] [`_mm512_maskz_broadcast_i32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcast_i32x4&expand=5236) + * [ ] [`_mm512_maskz_broadcast_i64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcast_i64x4&expand=5236) + * [ ] [`_mm512_maskz_broadcastd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastd_epi32&expand=5236) + * [ ] [`_mm512_maskz_broadcastq_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastq_epi64&expand=5236) + * [ ] [`_mm512_maskz_broadcastsd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastsd_pd&expand=5236) + * [ ] [`_mm512_maskz_broadcastss_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_broadcastss_ps&expand=5236) + * [ ] [`_mm512_maskz_compress_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_compress_epi32&expand=5236) + * [ ] [`_mm512_maskz_compress_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_compress_epi64&expand=5236) + * [ ] [`_mm512_maskz_compress_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_compress_pd&expand=5236) + * [ ] [`_mm512_maskz_compress_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_compress_ps&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundepi32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepi32_ps&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundepu32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundepu32_ps&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundpd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_ps&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundph_ps&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_epu32&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundps_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_pd&expand=5236) + * [ ] [`_mm512_maskz_cvt_roundps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundps_ph&expand=5236) + * [ ] [`_mm512_maskz_cvtepi16_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtepi16_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi16_epi64&expand=5236) + * [ ] [`_mm512_maskz_cvtepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_epi16&expand=5236) + * [ ] [`_mm512_maskz_cvtepi32_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_epi64&expand=5236) + * [ ] [`_mm512_maskz_cvtepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_epi8&expand=5236) + * [ ] [`_mm512_maskz_cvtepi32_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_pd&expand=5236) + * [ ] [`_mm512_maskz_cvtepi32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi32_ps&expand=5236) + * [ ] [`_mm512_maskz_cvtepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_epi16&expand=5236) + * [ ] [`_mm512_maskz_cvtepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi64_epi8&expand=5236) + * [ ] [`_mm512_maskz_cvtepi8_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi8_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtepi8_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepi8_epi64&expand=5236) + * [ ] [`_mm512_maskz_cvtepu16_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtepu16_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu16_epi64&expand=5236) + * [ ] [`_mm512_maskz_cvtepu32_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_epi64&expand=5236) + * [ ] [`_mm512_maskz_cvtepu32_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_pd&expand=5236) + * [ ] [`_mm512_maskz_cvtepu32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu32_ps&expand=5236) + * [ ] [`_mm512_maskz_cvtepu8_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu8_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtepu8_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtepu8_epi64&expand=5236) + * [ ] [`_mm512_maskz_cvtpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_epu32&expand=5236) + * [ ] [`_mm512_maskz_cvtpd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_ps&expand=5236) + * [ ] [`_mm512_maskz_cvtph_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtph_ps&expand=5236) + * [ ] [`_mm512_maskz_cvtps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtps_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtps_epu32&expand=5236) + * [ ] [`_mm512_maskz_cvtps_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtps_pd&expand=5236) + * [ ] [`_mm512_maskz_cvtps_ph`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtps_ph&expand=5236) + * [ ] [`_mm512_maskz_cvtsepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtsepi32_epi16&expand=5236) + * [ ] [`_mm512_maskz_cvtsepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtsepi32_epi8&expand=5236) + * [ ] [`_mm512_maskz_cvtsepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtsepi64_epi16&expand=5236) + * [ ] [`_mm512_maskz_cvtsepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtsepi64_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtsepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtsepi64_epi8&expand=5236) + * [ ] [`_mm512_maskz_cvtt_roundpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundpd_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtt_roundpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundpd_epu32&expand=5236) + * [ ] [`_mm512_maskz_cvtt_roundps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundps_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtt_roundps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtt_roundps_epu32&expand=5236) + * [ ] [`_mm512_maskz_cvttpd_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttpd_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvttpd_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttpd_epu32&expand=5236) + * [ ] [`_mm512_maskz_cvttps_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttps_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvttps_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvttps_epu32&expand=5236) + * [ ] [`_mm512_maskz_cvtusepi32_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi32_epi16&expand=5236) + * [ ] [`_mm512_maskz_cvtusepi32_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi32_epi8&expand=5236) + * [ ] [`_mm512_maskz_cvtusepi64_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi64_epi16&expand=5236) + * [ ] [`_mm512_maskz_cvtusepi64_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi64_epi32&expand=5236) + * [ ] [`_mm512_maskz_cvtusepi64_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtusepi64_epi8&expand=5236) + * [x] [`_mm512_maskz_div_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_pd&expand=5236) + * [x] [`_mm512_maskz_div_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_ps&expand=5236) + * [ ] [`_mm512_maskz_div_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_pd&expand=5236) + * [ ] [`_mm512_maskz_div_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_div_round_ps&expand=5236) + * [ ] [`_mm512_maskz_expand_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_epi32&expand=5236) + * [ ] [`_mm512_maskz_expand_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_epi64&expand=5236) + * [ ] [`_mm512_maskz_expand_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_pd&expand=5236) + * [ ] [`_mm512_maskz_expand_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expand_ps&expand=5236) + * [ ] [`_mm512_maskz_expandloadu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expandloadu_epi32&expand=5236) + * [ ] [`_mm512_maskz_expandloadu_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expandloadu_epi64&expand=5236) + * [ ] [`_mm512_maskz_expandloadu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expandloadu_pd&expand=5236) + * [ ] [`_mm512_maskz_expandloadu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expandloadu_ps&expand=5236) + * [ ] [`_mm512_maskz_extractf32x4_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_extractf32x4_ps&expand=5236) + * [ ] [`_mm512_maskz_extractf64x4_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_extractf64x4_pd&expand=5236) + * [ ] [`_mm512_maskz_extracti32x4_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_extracti32x4_epi32&expand=5236) + * [ ] [`_mm512_maskz_extracti64x4_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_extracti64x4_epi64&expand=5236) + * [ ] [`_mm512_maskz_fixupimm_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fixupimm_pd&expand=5236) + * [ ] [`_mm512_maskz_fixupimm_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fixupimm_ps&expand=5236) + * [ ] [`_mm512_maskz_fixupimm_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fixupimm_round_pd&expand=5236) + * [ ] [`_mm512_maskz_fixupimm_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fixupimm_round_ps&expand=5236) + * [ ] [`_mm512_maskz_fmadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_pd&expand=5236) + * [ ] [`_mm512_maskz_fmadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_ps&expand=5236) + * [ ] [`_mm512_maskz_fmadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_pd&expand=5236) + * [ ] [`_mm512_maskz_fmadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmadd_round_ps&expand=5236) + * [ ] [`_mm512_maskz_fmaddsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_pd&expand=5236) + * [ ] [`_mm512_maskz_fmaddsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_ps&expand=5236) + * [ ] [`_mm512_maskz_fmaddsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_pd&expand=5236) + * [ ] [`_mm512_maskz_fmaddsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmaddsub_round_ps&expand=5236) + * [ ] [`_mm512_maskz_fmsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_pd&expand=5236) + * [ ] [`_mm512_maskz_fmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_ps&expand=5236) + * [ ] [`_mm512_maskz_fmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_pd&expand=5236) + * [ ] [`_mm512_maskz_fmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsub_round_ps&expand=5236) + * [ ] [`_mm512_maskz_fmsubadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_pd&expand=5236) + * [ ] [`_mm512_maskz_fmsubadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_ps&expand=5236) + * [ ] [`_mm512_maskz_fmsubadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_pd&expand=5236) + * [ ] [`_mm512_maskz_fmsubadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fmsubadd_round_ps&expand=5236) + * [ ] [`_mm512_maskz_fnmadd_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_pd&expand=5236) + * [ ] [`_mm512_maskz_fnmadd_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_ps&expand=5236) + * [ ] [`_mm512_maskz_fnmadd_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_pd&expand=5236) + * [ ] [`_mm512_maskz_fnmadd_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmadd_round_ps&expand=5236) + * [ ] [`_mm512_maskz_fnmsub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_pd&expand=5236) + * [ ] [`_mm512_maskz_fnmsub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_ps&expand=5236) + * [ ] [`_mm512_maskz_fnmsub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_pd&expand=5236) + * [ ] [`_mm512_maskz_fnmsub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_fnmsub_round_ps&expand=5236) + * [ ] [`_mm512_maskz_getexp_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_pd&expand=5236) + * [ ] [`_mm512_maskz_getexp_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_ps&expand=5236) + * [ ] [`_mm512_maskz_getexp_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_pd&expand=5236) + * [ ] [`_mm512_maskz_getexp_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getexp_round_ps&expand=5236) + * [ ] [`_mm512_maskz_getmant_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_pd&expand=5236) + * [ ] [`_mm512_maskz_getmant_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_ps&expand=5236) + * [ ] [`_mm512_maskz_getmant_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_pd&expand=5236) + * [ ] [`_mm512_maskz_getmant_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_getmant_round_ps&expand=5236) + * [ ] [`_mm512_maskz_insertf32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_insertf32x4&expand=5236) + * [ ] [`_mm512_maskz_insertf64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_insertf64x4&expand=5236) + * [ ] [`_mm512_maskz_inserti32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_inserti32x4&expand=5236) + * [ ] [`_mm512_maskz_inserti64x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_inserti64x4&expand=5236) + * [ ] [`_mm512_maskz_load_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_load_epi32&expand=5236) + * [ ] [`_mm512_maskz_load_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_load_epi64&expand=5236) + * [ ] [`_mm512_maskz_load_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_load_pd&expand=5236) + * [ ] [`_mm512_maskz_load_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_load_ps&expand=5236) + * [ ] [`_mm512_maskz_loadu_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_loadu_epi32&expand=5236) + * [ ] [`_mm512_maskz_loadu_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_loadu_epi64&expand=5236) + * [ ] [`_mm512_maskz_loadu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_loadu_pd&expand=5236) + * [ ] [`_mm512_maskz_loadu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_loadu_ps&expand=5236) + * [x] [`_mm512_maskz_max_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi32&expand=5236) + * [x] [`_mm512_maskz_max_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epi64&expand=5236) + * [x] [`_mm512_maskz_max_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu32&expand=5236) + * [x] [`_mm512_maskz_max_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_epu64&expand=5236) + * [x] [`_mm512_maskz_max_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_pd&expand=5236) + * [x] [`_mm512_maskz_max_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_ps&expand=5236) + * [ ] [`_mm512_maskz_max_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_pd&expand=5236) + * [ ] [`_mm512_maskz_max_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_max_round_ps&expand=5236) + * [x] [`_mm512_maskz_min_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi32&expand=5236) + * [x] [`_mm512_maskz_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epi64&expand=5236) + * [x] [`_mm512_maskz_min_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu32&expand=5236) + * [x] [`_mm512_maskz_min_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_epu64&expand=5236) + * [x] [`_mm512_maskz_min_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_pd&expand=5236) + * [x] [`_mm512_maskz_min_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_ps&expand=5236) + * [ ] [`_mm512_maskz_min_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_pd&expand=5236) + * [ ] [`_mm512_maskz_min_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_min_round_ps&expand=5236) + * [ ] [`_mm512_maskz_mov_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi32&expand=5236) + * [ ] [`_mm512_maskz_mov_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_epi64&expand=5236) + * [ ] [`_mm512_maskz_mov_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_pd&expand=5236) + * [ ] [`_mm512_maskz_mov_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mov_ps&expand=5236) + * [ ] [`_mm512_maskz_movedup_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_movedup_pd&expand=5236) + * [ ] [`_mm512_maskz_movehdup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_movehdup_ps&expand=5236) + * [ ] [`_mm512_maskz_moveldup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_moveldup_ps&expand=5236) + * [x] [`_mm512_maskz_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_epi32&expand=5236) + * [x] [`_mm512_maskz_mul_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_epu32&expand=5236) + * [x] [`_mm512_maskz_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_pd&expand=5236) + * [x] [`_mm512_maskz_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_ps&expand=5236) + * [ ] [`_mm512_maskz_mul_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_pd&expand=5236) + * [ ] [`_mm512_maskz_mul_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mul_round_ps&expand=5236) + * [x] [`_mm512_maskz_mullo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_mullo_epi32&expand=5236) * [x] [`_mm512_maskz_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi32&expand=5236) * [x] [`_mm512_maskz_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi64&expand=5236) + * [ ] [`_mm512_maskz_permute_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permute_pd&expand=5236) + * [ ] [`_mm512_maskz_permute_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permute_ps&expand=5236) + * [ ] [`_mm512_maskz_permutevar_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutevar_pd&expand=5236) + * [ ] [`_mm512_maskz_permutevar_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutevar_ps&expand=5236) + * [ ] [`_mm512_maskz_permutex2var_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_epi32&expand=5236) + * [ ] [`_mm512_maskz_permutex2var_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_epi64&expand=5236) + * [ ] [`_mm512_maskz_permutex2var_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_pd&expand=5236) + * [ ] [`_mm512_maskz_permutex2var_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex2var_ps&expand=5236) + * [ ] [`_mm512_maskz_permutex_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex_epi64&expand=5236) + * [ ] [`_mm512_maskz_permutex_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutex_pd&expand=5236) + * [ ] [`_mm512_maskz_permutexvar_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_epi32&expand=5236) + * [ ] [`_mm512_maskz_permutexvar_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_epi64&expand=5236) + * [ ] [`_mm512_maskz_permutexvar_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_pd&expand=5236) + * [ ] [`_mm512_maskz_permutexvar_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_permutexvar_ps&expand=5236) + * [ ] [`_mm512_maskz_rcp14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp14_pd&expand=5236) + * [ ] [`_mm512_maskz_rcp14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rcp14_ps&expand=5236) * [x] [`_mm512_maskz_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi32&expand=5236) * [x] [`_mm512_maskz_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi64&expand=5236) * [x] [`_mm512_maskz_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi32&expand=5236) @@ -52,12 +877,35 @@ * [x] [`_mm512_maskz_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi64&expand=5236) * [x] [`_mm512_maskz_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi32&expand=5236) * [x] [`_mm512_maskz_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi64&expand=5236) + * [ ] [`_mm512_maskz_roundscale_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_pd&expand=5236) + * [ ] [`_mm512_maskz_roundscale_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_ps&expand=5236) + * [ ] [`_mm512_maskz_roundscale_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_pd&expand=5236) + * [ ] [`_mm512_maskz_roundscale_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_roundscale_round_ps&expand=5236) + * [ ] [`_mm512_maskz_rsqrt14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt14_pd&expand=5236) + * [ ] [`_mm512_maskz_rsqrt14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rsqrt14_ps&expand=5236) + * [ ] [`_mm512_maskz_scalef_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_pd&expand=5236) + * [ ] [`_mm512_maskz_scalef_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_ps&expand=5236) + * [ ] [`_mm512_maskz_scalef_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_pd&expand=5236) + * [ ] [`_mm512_maskz_scalef_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_scalef_round_ps&expand=5236) + * [ ] [`_mm512_maskz_set1_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi32&expand=5236) + * [ ] [`_mm512_maskz_set1_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_set1_epi64&expand=5236) + * [ ] [`_mm512_maskz_shuffle_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_epi32&expand=5236) + * [ ] [`_mm512_maskz_shuffle_f32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_f32x4&expand=5236) + * [ ] [`_mm512_maskz_shuffle_f64x2`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_f64x2&expand=5236) + * [ ] [`_mm512_maskz_shuffle_i32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_i32x4&expand=5236) + * [ ] [`_mm512_maskz_shuffle_i64x2`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_i64x2&expand=5236) + * [ ] [`_mm512_maskz_shuffle_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_pd&expand=5236) + * [ ] [`_mm512_maskz_shuffle_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shuffle_ps&expand=5236) * [x] [`_mm512_maskz_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi32&expand=5236) * [x] [`_mm512_maskz_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi64&expand=5236) * [x] [`_mm512_maskz_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi32&expand=5236) * [x] [`_mm512_maskz_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi64&expand=5236) * [x] [`_mm512_maskz_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi32&expand=5236) * [x] [`_mm512_maskz_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi64&expand=5236) + * [x] [`_mm512_maskz_sqrt_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_pd&expand=5236) + * [x] [`_mm512_maskz_sqrt_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_ps&expand=5236) + * [ ] [`_mm512_maskz_sqrt_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_pd&expand=5236) + * [ ] [`_mm512_maskz_sqrt_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sqrt_round_ps&expand=5236) * [x] [`_mm512_maskz_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi32&expand=5236) * [x] [`_mm512_maskz_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi64&expand=5236) * [x] [`_mm512_maskz_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi32&expand=5236) @@ -70,11 +918,95 @@ * [x] [`_mm512_maskz_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi64&expand=5236) * [x] [`_mm512_maskz_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi32&expand=5236) * [x] [`_mm512_maskz_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi64&expand=5236) + * [x] [`_mm512_maskz_sub_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi32&expand=5236) + * [x] [`_mm512_maskz_sub_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_epi64&expand=5236) + * [x] [`_mm512_maskz_sub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_pd&expand=5236) + * [x] [`_mm512_maskz_sub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_ps&expand=5236) + * [ ] [`_mm512_maskz_sub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_pd&expand=5236) + * [ ] [`_mm512_maskz_sub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sub_round_ps&expand=5236) + * [ ] [`_mm512_maskz_ternarylogic_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ternarylogic_epi32&expand=5236) + * [ ] [`_mm512_maskz_ternarylogic_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ternarylogic_epi64&expand=5236) + * [ ] [`_mm512_maskz_unpackhi_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi32&expand=5236) + * [ ] [`_mm512_maskz_unpackhi_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_epi64&expand=5236) + * [ ] [`_mm512_maskz_unpackhi_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_pd&expand=5236) + * [ ] [`_mm512_maskz_unpackhi_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpackhi_ps&expand=5236) + * [ ] [`_mm512_maskz_unpacklo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi32&expand=5236) + * [ ] [`_mm512_maskz_unpacklo_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_epi64&expand=5236) + * [ ] [`_mm512_maskz_unpacklo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_pd&expand=5236) + * [ ] [`_mm512_maskz_unpacklo_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_unpacklo_ps&expand=5236) * [x] [`_mm512_maskz_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi32&expand=5236) * [x] [`_mm512_maskz_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi64&expand=5236) + * [x] [`_mm512_max_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi32&expand=5236) + * [x] [`_mm512_max_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epi64&expand=5236) + * [x] [`_mm512_max_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu32&expand=5236) + * [x] [`_mm512_max_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_epu64&expand=5236) + * [x] [`_mm512_max_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_pd&expand=5236) + * [x] [`_mm512_max_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_ps&expand=5236) + * [ ] [`_mm512_max_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_pd&expand=5236) + * [ ] [`_mm512_max_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_max_round_ps&expand=5236) + * [x] [`_mm512_min_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi32&expand=5236) + * [x] [`_mm512_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epi64&expand=5236) + * [x] [`_mm512_min_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu32&expand=5236) + * [x] [`_mm512_min_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_epu64&expand=5236) + * [x] [`_mm512_min_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_pd&expand=5236) + * [x] [`_mm512_min_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_ps&expand=5236) + * [ ] [`_mm512_min_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_pd&expand=5236) + * [ ] [`_mm512_min_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_min_round_ps&expand=5236) + * [ ] [`_mm512_movedup_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movedup_pd&expand=5236) + * [ ] [`_mm512_movehdup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_movehdup_ps&expand=5236) + * [ ] [`_mm512_moveldup_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_moveldup_ps&expand=5236) + * [x] [`_mm512_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_epi32&expand=5236) + * [x] [`_mm512_mul_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_epu32&expand=5236) + * [x] [`_mm512_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_pd&expand=5236) + * [x] [`_mm512_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_ps&expand=5236) + * [ ] [`_mm512_mul_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_pd&expand=5236) + * [ ] [`_mm512_mul_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mul_round_ps&expand=5236) + * [x] [`_mm512_mullo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullo_epi32&expand=5236) + * [x] [`_mm512_mullox_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mullox_epi64&expand=5236) * [x] [`_mm512_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi32&expand=5236) * [x] [`_mm512_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi64&expand=5236) * [x] [`_mm512_or_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_si512&expand=5236) + * [ ] [`_mm512_permute_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permute_pd&expand=5236) + * [ ] [`_mm512_permute_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permute_ps&expand=5236) + * [ ] [`_mm512_permutevar_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutevar_epi32&expand=5236) + * [ ] [`_mm512_permutevar_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutevar_pd&expand=5236) + * [ ] [`_mm512_permutevar_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutevar_ps&expand=5236) + * [ ] [`_mm512_permutex2var_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_epi32&expand=5236) + * [ ] [`_mm512_permutex2var_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_epi64&expand=5236) + * [ ] [`_mm512_permutex2var_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_pd&expand=5236) + * [ ] [`_mm512_permutex2var_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ps&expand=5236) + * [ ] [`_mm512_permutex_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex_epi64&expand=5236) + * [ ] [`_mm512_permutex_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex_pd&expand=5236) + * [ ] [`_mm512_permutexvar_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_epi32&expand=5236) + * [ ] [`_mm512_permutexvar_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_epi64&expand=5236) + * [ ] [`_mm512_permutexvar_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_pd&expand=5236) + * [ ] [`_mm512_permutexvar_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ps&expand=5236) + * [ ] [`_mm512_rcp14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp14_pd&expand=5236) + * [ ] [`_mm512_rcp14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rcp14_ps&expand=5236) + * [ ] [`_mm512_reduce_add_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_epi32&expand=5236) + * [ ] [`_mm512_reduce_add_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_epi64&expand=5236) + * [ ] [`_mm512_reduce_add_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_pd&expand=5236) + * [ ] [`_mm512_reduce_add_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_add_ps&expand=5236) + * [ ] [`_mm512_reduce_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_and_epi32&expand=5236) + * [ ] [`_mm512_reduce_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_and_epi64&expand=5236) + * [ ] [`_mm512_reduce_max_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_epi32&expand=5236) + * [ ] [`_mm512_reduce_max_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_epi64&expand=5236) + * [ ] [`_mm512_reduce_max_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_epu32&expand=5236) + * [ ] [`_mm512_reduce_max_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_epu64&expand=5236) + * [ ] [`_mm512_reduce_max_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_pd&expand=5236) + * [ ] [`_mm512_reduce_max_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_max_ps&expand=5236) + * [ ] [`_mm512_reduce_min_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_epi32&expand=5236) + * [ ] [`_mm512_reduce_min_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_epi64&expand=5236) + * [ ] [`_mm512_reduce_min_epu32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_epu32&expand=5236) + * [ ] [`_mm512_reduce_min_epu64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_epu64&expand=5236) + * [ ] [`_mm512_reduce_min_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_pd&expand=5236) + * [ ] [`_mm512_reduce_min_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_min_ps&expand=5236) + * [ ] [`_mm512_reduce_mul_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_epi32&expand=5236) + * [ ] [`_mm512_reduce_mul_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_epi64&expand=5236) + * [ ] [`_mm512_reduce_mul_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_pd&expand=5236) + * [ ] [`_mm512_reduce_mul_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_mul_ps&expand=5236) + * [ ] [`_mm512_reduce_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_or_epi32&expand=5236) + * [ ] [`_mm512_reduce_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_reduce_or_epi64&expand=5236) * [x] [`_mm512_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=5236) * [x] [`_mm512_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=5236) * [x] [`_mm512_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi32&expand=5236) @@ -83,12 +1015,62 @@ * [x] [`_mm512_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi64&expand=5236) * [x] [`_mm512_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi32&expand=5236) * [x] [`_mm512_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi64&expand=5236) + * [ ] [`_mm512_roundscale_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_pd&expand=5236) + * [ ] [`_mm512_roundscale_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_ps&expand=5236) + * [ ] [`_mm512_roundscale_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_pd&expand=5236) + * [ ] [`_mm512_roundscale_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_roundscale_round_ps&expand=5236) + * [ ] [`_mm512_rsqrt14_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt14_pd&expand=5236) + * [ ] [`_mm512_rsqrt14_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rsqrt14_ps&expand=5236) + * [ ] [`_mm512_scalef_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_pd&expand=5236) + * [ ] [`_mm512_scalef_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_ps&expand=5236) + * [ ] [`_mm512_scalef_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_pd&expand=5236) + * [ ] [`_mm512_scalef_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_scalef_round_ps&expand=5236) + * [ ] [`_mm512_set1_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi16&expand=5236) + * [ ] [`_mm512_set1_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32&expand=5236) + * [ ] [`_mm512_set1_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi64&expand=5236) + * [ ] [`_mm512_set1_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi8&expand=5236) + * [ ] [`_mm512_set1_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_pd&expand=5236) + * [ ] [`_mm512_set1_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_ps&expand=5236) + * [ ] [`_mm512_set4_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set4_epi32&expand=5236) + * [ ] [`_mm512_set4_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set4_epi64&expand=5236) + * [ ] [`_mm512_set4_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set4_pd&expand=5236) + * [ ] [`_mm512_set4_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set4_ps&expand=5236) + * [ ] [`_mm512_set_epi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_epi16&expand=5236) + * [ ] [`_mm512_set_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_epi32&expand=5236) + * [x] [`_mm512_set_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_epi64&expand=5236) + * [ ] [`_mm512_set_epi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_epi8&expand=5236) + * [x] [`_mm512_set_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_pd&expand=5236) + * [ ] [`_mm512_set_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set_ps&expand=5236) + * [ ] [`_mm512_setr4_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr4_epi32&expand=5236) + * [ ] [`_mm512_setr4_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr4_epi64&expand=5236) + * [ ] [`_mm512_setr4_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr4_pd&expand=5236) + * [ ] [`_mm512_setr4_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr4_ps&expand=5236) + * [x] [`_mm512_setr_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_epi32&expand=5236) + * [x] [`_mm512_setr_epi64`](https:/software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_epi64&expand=5236) + * [x] [`_mm512_setr_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_pd&expand=5236) + * [ ] [`_mm512_setr_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_ps&expand=5236) + * [ ] [`_mm512_setzero_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_epi32&expand=5236) + * [x] [`_mm512_setzero_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_pd&expand=5236) + * [ ] [`_mm512_setzero_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ps&expand=5236) + * [x] [`_mm512_setzero_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_si512&expand=5236) + * [ ] [`_mm512_setzero`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero&expand=5236) + * [ ] [`_mm512_shuffle_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_epi32&expand=5236) + * [ ] [`_mm512_shuffle_f32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_f32x4&expand=5236) + * [ ] [`_mm512_shuffle_f64x2`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_f64x2&expand=5236) + * [ ] [`_mm512_shuffle_i32x4`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_i32x4&expand=5236) + * [ ] [`_mm512_shuffle_i64x2`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_i64x2&expand=5236) + * [ ] [`_mm512_shuffle_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_pd&expand=5236) + * [ ] [`_mm512_shuffle_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shuffle_ps&expand=5236) * [x] [`_mm512_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi32&expand=5236) * [x] [`_mm512_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi64&expand=5236) * [x] [`_mm512_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi32&expand=5236) * [x] [`_mm512_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi64&expand=5236) * [x] [`_mm512_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi32&expand=5236) * [x] [`_mm512_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi64&expand=5236) + * [x] [`_mm512_sqrt_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_pd&expand=5236) + * [x] [`_mm512_sqrt_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_ps&expand=5236) + * [ ] [`_mm512_sqrt_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_pd&expand=5236) + * [ ] [`_mm512_sqrt_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sqrt_round_ps&expand=5236) * [x] [`_mm512_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi32&expand=5236) * [x] [`_mm512_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi64&expand=5236) * [x] [`_mm512_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi32&expand=5236) @@ -101,7 +1083,336 @@ * [x] [`_mm512_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi64&expand=5236) * [x] [`_mm512_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi32&expand=5236) * [x] [`_mm512_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi64&expand=5236) + * [ ] [`_mm512_store_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_epi32&expand=5236) + * [ ] [`_mm512_store_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_epi64&expand=5236) + * [ ] [`_mm512_store_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_pd&expand=5236) + * [ ] [`_mm512_store_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_ps&expand=5236) + * [ ] [`_mm512_store_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_store_si512&expand=5236) + * [x] [`_mm512_storeu_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd&expand=5236) + * [x] [`_mm512_storeu_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps&expand=5236) + * [ ] [`_mm512_storeu_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_si512&expand=5236) + * [ ] [`_mm512_stream_load_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_load_si512&expand=5236) + * [ ] [`_mm512_stream_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_pd&expand=5236) + * [ ] [`_mm512_stream_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_ps&expand=5236) + * [ ] [`_mm512_stream_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_si512&expand=5236) + * [x] [`_mm512_sub_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi32&expand=5236) + * [x] [`_mm512_sub_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_epi64&expand=5236) + * [x] [`_mm512_sub_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_pd&expand=5236) + * [x] [`_mm512_sub_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_ps&expand=5236) + * [ ] [`_mm512_sub_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_pd&expand=5236) + * [ ] [`_mm512_sub_round_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sub_round_ps&expand=5236) + * [ ] [`_mm512_svml_round_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_svml_round_pd&expand=5236) + * [ ] [`_mm512_ternarylogic_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ternarylogic_epi32&expand=5236) + * [ ] [`_mm512_ternarylogic_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ternarylogic_epi64&expand=5236) + * [ ] [`_mm512_test_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_test_epi32_mask&expand=5236) + * [ ] [`_mm512_test_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_test_epi64_mask&expand=5236) + * [ ] [`_mm512_testn_epi32_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_testn_epi32_mask&expand=5236) + * [ ] [`_mm512_testn_epi64_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_testn_epi64_mask&expand=5236) + * [ ] [`_mm512_undefined_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_epi32&expand=5236) + * [x] [`_mm512_undefined_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd&expand=5236) + * [x] [`_mm512_undefined_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps&expand=5236) + * [ ] [`_mm512_undefined`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined&expand=5236) + * [ ] [`_mm512_unpackhi_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi32&expand=5236) + * [ ] [`_mm512_unpackhi_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_epi64&expand=5236) + * [ ] [`_mm512_unpackhi_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_pd&expand=5236) + * [ ] [`_mm512_unpackhi_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpackhi_ps&expand=5236) + * [ ] [`_mm512_unpacklo_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi32&expand=5236) + * [ ] [`_mm512_unpacklo_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_epi64&expand=5236) + * [ ] [`_mm512_unpacklo_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_pd&expand=5236) + * [ ] [`_mm512_unpacklo_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_unpacklo_ps&expand=5236) * [x] [`_mm512_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi32&expand=5236) * [x] [`_mm512_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi64&expand=5236) * [x] [`_mm512_xor_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_si512&expand=5236) + * [ ] [`_mm512_zextpd128_pd512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextpd128_pd512&expand=5236) + * [ ] [`_mm512_zextpd256_pd512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextpd256_pd512&expand=5236) + * [ ] [`_mm512_zextps128_ps512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextps128_ps512&expand=5236) + * [ ] [`_mm512_zextps256_ps512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextps256_ps512&expand=5236) + * [ ] [`_mm512_zextsi128_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextsi128_si512&expand=5236) + * [ ] [`_mm512_zextsi256_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextsi256_si512&expand=5236) + * [ ] [`_mm_add_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_sd&expand=5236) + * [ ] [`_mm_add_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_round_ss&expand=5236) + * [x] [`_mm_cmp_round_sd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236) + * [x] [`_mm_cmp_round_ss_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236) + * [x] [`_mm_cmp_sd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236) + * [x] [`_mm_cmp_ss_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236) + * [ ] [`_mm_comi_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sd&expand=5236) + * [ ] [`_mm_comi_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_ss&expand=5236) + * [ ] [`_mm_cvt_roundi32_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi32_ss&expand=5236) + * [ ] [`_mm_cvt_roundi64_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sd&expand=5236) + * [ ] [`_mm_cvt_roundi64_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_ss&expand=5236) + * [ ] [`_mm_cvt_roundsd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i32&expand=5236) + * [ ] [`_mm_cvt_roundsd_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i64&expand=5236) + * [ ] [`_mm_cvt_roundsd_si32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si32&expand=5236) + * [ ] [`_mm_cvt_roundsd_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si64&expand=5236) + * [ ] [`_mm_cvt_roundsd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_ss&expand=5236) + * [ ] [`_mm_cvt_roundsd_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u32&expand=5236) + * [ ] [`_mm_cvt_roundsd_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u64&expand=5236) + * [ ] [`_mm_cvt_roundsi32_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi32_ss&expand=5236) + * [ ] [`_mm_cvt_roundsi64_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_sd&expand=5236) + * [ ] [`_mm_cvt_roundsi64_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_ss&expand=5236) + * [ ] [`_mm_cvt_roundss_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i32&expand=5236) + * [ ] [`_mm_cvt_roundss_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i64&expand=5236) + * [ ] [`_mm_cvt_roundss_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_sd&expand=5236) + * [ ] [`_mm_cvt_roundss_si32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si32&expand=5236) + * [ ] [`_mm_cvt_roundss_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si64&expand=5236) + * [ ] [`_mm_cvt_roundss_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u32&expand=5236) + * [ ] [`_mm_cvt_roundss_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u64&expand=5236) + * [ ] [`_mm_cvt_roundu32_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu32_ss&expand=5236) + * [ ] [`_mm_cvt_roundu64_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sd&expand=5236) + * [ ] [`_mm_cvt_roundu64_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_ss&expand=5236) + * [ ] [`_mm_cvti32_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_sd&expand=5236) + * [ ] [`_mm_cvti32_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti32_ss&expand=5236) + * [ ] [`_mm_cvti64_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sd&expand=5236) + * [ ] [`_mm_cvti64_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_ss&expand=5236) + * [ ] [`_mm_cvtsd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_i32&expand=5236) + * [ ] [`_mm_cvtsd_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_i64&expand=5236) + * [ ] [`_mm_cvtsd_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_u32&expand=5236) + * [ ] [`_mm_cvtsd_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_u64&expand=5236) + * [ ] [`_mm_cvtss_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_i32&expand=5236) + * [ ] [`_mm_cvtss_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_i64&expand=5236) + * [ ] [`_mm_cvtss_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_u32&expand=5236) + * [ ] [`_mm_cvtss_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_u64&expand=5236) + * [ ] [`_mm_cvtt_roundsd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i32&expand=5236) + * [ ] [`_mm_cvtt_roundsd_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i64&expand=5236) + * [ ] [`_mm_cvtt_roundsd_si32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si32&expand=5236) + * [ ] [`_mm_cvtt_roundsd_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si64&expand=5236) + * [ ] [`_mm_cvtt_roundsd_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u32&expand=5236) + * [ ] [`_mm_cvtt_roundsd_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u64&expand=5236) + * [ ] [`_mm_cvtt_roundss_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i32&expand=5236) + * [ ] [`_mm_cvtt_roundss_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i64&expand=5236) + * [ ] [`_mm_cvtt_roundss_si32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si32&expand=5236) + * [ ] [`_mm_cvtt_roundss_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si64&expand=5236) + * [ ] [`_mm_cvtt_roundss_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u32&expand=5236) + * [ ] [`_mm_cvtt_roundss_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u64&expand=5236) + * [ ] [`_mm_cvttsd_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i32&expand=5236) + * [ ] [`_mm_cvttsd_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i64&expand=5236) + * [ ] [`_mm_cvttsd_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u32&expand=5236) + * [ ] [`_mm_cvttsd_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u64&expand=5236) + * [ ] [`_mm_cvttss_i32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_i32&expand=5236) + * [ ] [`_mm_cvttss_i64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_i64&expand=5236) + * [ ] [`_mm_cvttss_u32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u32&expand=5236) + * [ ] [`_mm_cvttss_u64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u64&expand=5236) + * [ ] [`_mm_cvtu32_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sd&expand=5236) + * [ ] [`_mm_cvtu32_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_ss&expand=5236) + * [ ] [`_mm_cvtu64_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=5236) + * [ ] [`_mm_cvtu64_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=5236) + * [ ] [`_mm_div_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_sd&expand=5236) + * [ ] [`_mm_div_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_round_ss&expand=5236) + * [ ] [`_mm_fixupimm_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fixupimm_round_sd&expand=5236) + * [ ] [`_mm_fixupimm_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fixupimm_round_ss&expand=5236) + * [ ] [`_mm_fixupimm_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fixupimm_sd&expand=5236) + * [ ] [`_mm_fixupimm_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fixupimm_ss&expand=5236) + * [ ] [`_mm_fmadd_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_sd&expand=5236) + * [ ] [`_mm_fmadd_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_round_ss&expand=5236) + * [ ] [`_mm_fmsub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_sd&expand=5236) + * [ ] [`_mm_fmsub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_round_ss&expand=5236) + * [ ] [`_mm_fnmadd_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_sd&expand=5236) + * [ ] [`_mm_fnmadd_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_round_ss&expand=5236) + * [ ] [`_mm_fnmsub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_sd&expand=5236) + * [ ] [`_mm_fnmsub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_round_ss&expand=5236) + * [ ] [`_mm_getexp_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_sd&expand=5236) + * [ ] [`_mm_getexp_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_round_ss&expand=5236) + * [ ] [`_mm_getexp_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_sd&expand=5236) + * [ ] [`_mm_getexp_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getexp_ss&expand=5236) + * [ ] [`_mm_getmant_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_sd&expand=5236) + * [ ] [`_mm_getmant_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_round_ss&expand=5236) + * [ ] [`_mm_getmant_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_sd&expand=5236) + * [ ] [`_mm_getmant_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getmant_ss&expand=5236) + * [ ] [`_mm_mask3_fmadd_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_sd&expand=5236) + * [ ] [`_mm_mask3_fmadd_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_round_ss&expand=5236) + * [ ] [`_mm_mask3_fmadd_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_sd&expand=5236) + * [ ] [`_mm_mask3_fmadd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmadd_ss&expand=5236) + * [ ] [`_mm_mask3_fmsub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_sd&expand=5236) + * [ ] [`_mm_mask3_fmsub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_round_ss&expand=5236) + * [ ] [`_mm_mask3_fmsub_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_sd&expand=5236) + * [ ] [`_mm_mask3_fmsub_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fmsub_ss&expand=5236) + * [ ] [`_mm_mask3_fnmadd_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_sd&expand=5236) + * [ ] [`_mm_mask3_fnmadd_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_round_ss&expand=5236) + * [ ] [`_mm_mask3_fnmadd_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_sd&expand=5236) + * [ ] [`_mm_mask3_fnmadd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmadd_ss&expand=5236) + * [ ] [`_mm_mask3_fnmsub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_sd&expand=5236) + * [ ] [`_mm_mask3_fnmsub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_round_ss&expand=5236) + * [ ] [`_mm_mask3_fnmsub_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_sd&expand=5236) + * [ ] [`_mm_mask3_fnmsub_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask3_fnmsub_ss&expand=5236) + * [ ] [`_mm_mask_add_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_sd&expand=5236) + * [ ] [`_mm_mask_add_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_round_ss&expand=5236) + * [ ] [`_mm_mask_add_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_sd&expand=5236) + * [ ] [`_mm_mask_add_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_add_ss&expand=5236) + * [x] [`_mm_mask_cmp_round_sd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236) + * [x] [`_mm_mask_cmp_round_ss_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236) + * [x] [`_mm_mask_cmp_sd_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236) + * [x] [`_mm_mask_cmp_ss_mask`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236) + * [ ] [`_mm_mask_cvt_roundsd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundsd_ss&expand=5236) + * [ ] [`_mm_mask_cvt_roundss_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundss_sd&expand=5236) + * [ ] [`_mm_mask_cvtsd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsd_ss&expand=5236) + * [ ] [`_mm_mask_cvtss_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtss_sd&expand=5236) + * [ ] [`_mm_mask_div_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_sd&expand=5236) + * [ ] [`_mm_mask_div_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_round_ss&expand=5236) + * [ ] [`_mm_mask_div_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_sd&expand=5236) + * [ ] [`_mm_mask_div_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_div_ss&expand=5236) + * [ ] [`_mm_mask_fixupimm_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fixupimm_round_sd&expand=5236) + * [ ] [`_mm_mask_fixupimm_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fixupimm_round_ss&expand=5236) + * [ ] [`_mm_mask_fixupimm_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fixupimm_sd&expand=5236) + * [ ] [`_mm_mask_fixupimm_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fixupimm_ss&expand=5236) + * [ ] [`_mm_mask_fmadd_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_sd&expand=5236) + * [ ] [`_mm_mask_fmadd_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_round_ss&expand=5236) + * [ ] [`_mm_mask_fmadd_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_sd&expand=5236) + * [ ] [`_mm_mask_fmadd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmadd_ss&expand=5236) + * [ ] [`_mm_mask_fmsub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_sd&expand=5236) + * [ ] [`_mm_mask_fmsub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_round_ss&expand=5236) + * [ ] [`_mm_mask_fmsub_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_sd&expand=5236) + * [ ] [`_mm_mask_fmsub_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fmsub_ss&expand=5236) + * [ ] [`_mm_mask_fnmadd_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_sd&expand=5236) + * [ ] [`_mm_mask_fnmadd_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_round_ss&expand=5236) + * [ ] [`_mm_mask_fnmadd_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_sd&expand=5236) + * [ ] [`_mm_mask_fnmadd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmadd_ss&expand=5236) + * [ ] [`_mm_mask_fnmsub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_sd&expand=5236) + * [ ] [`_mm_mask_fnmsub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_round_ss&expand=5236) + * [ ] [`_mm_mask_fnmsub_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_sd&expand=5236) + * [ ] [`_mm_mask_fnmsub_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_fnmsub_ss&expand=5236) + * [ ] [`_mm_mask_getexp_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_sd&expand=5236) + * [ ] [`_mm_mask_getexp_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_round_ss&expand=5236) + * [ ] [`_mm_mask_getexp_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_sd&expand=5236) + * [ ] [`_mm_mask_getexp_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getexp_ss&expand=5236) + * [ ] [`_mm_mask_getmant_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_sd&expand=5236) + * [ ] [`_mm_mask_getmant_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_round_ss&expand=5236) + * [ ] [`_mm_mask_getmant_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_sd&expand=5236) + * [ ] [`_mm_mask_getmant_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_getmant_ss&expand=5236) + * [ ] [`_mm_mask_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd&expand=5236) + * [ ] [`_mm_mask_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss&expand=5236) + * [ ] [`_mm_mask_max_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_sd&expand=5236) + * [ ] [`_mm_mask_max_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_round_ss&expand=5236) + * [ ] [`_mm_mask_max_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_sd&expand=5236) + * [ ] [`_mm_mask_max_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_max_ss&expand=5236) + * [ ] [`_mm_mask_min_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_sd&expand=5236) + * [ ] [`_mm_mask_min_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_round_ss&expand=5236) + * [ ] [`_mm_mask_min_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_sd&expand=5236) + * [ ] [`_mm_mask_min_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_min_ss&expand=5236) + * [ ] [`_mm_mask_move_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sd&expand=5236) + * [ ] [`_mm_mask_move_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_ss&expand=5236) + * [ ] [`_mm_mask_mul_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_sd&expand=5236) + * [ ] [`_mm_mask_mul_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_round_ss&expand=5236) + * [ ] [`_mm_mask_mul_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_sd&expand=5236) + * [ ] [`_mm_mask_mul_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_mul_ss&expand=5236) + * [ ] [`_mm_mask_rcp14_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp14_sd&expand=5236) + * [ ] [`_mm_mask_rcp14_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rcp14_ss&expand=5236) + * [ ] [`_mm_mask_roundscale_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_sd&expand=5236) + * [ ] [`_mm_mask_roundscale_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_round_ss&expand=5236) + * [ ] [`_mm_mask_roundscale_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_sd&expand=5236) + * [ ] [`_mm_mask_roundscale_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_roundscale_ss&expand=5236) + * [ ] [`_mm_mask_rsqrt14_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt14_sd&expand=5236) + * [ ] [`_mm_mask_rsqrt14_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_rsqrt14_ss&expand=5236) + * [ ] [`_mm_mask_scalef_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_sd&expand=5236) + * [ ] [`_mm_mask_scalef_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_round_ss&expand=5236) + * [ ] [`_mm_mask_scalef_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_sd&expand=5236) + * [ ] [`_mm_mask_scalef_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_scalef_ss&expand=5236) + * [ ] [`_mm_mask_sqrt_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_sd&expand=5236) + * [ ] [`_mm_mask_sqrt_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_round_ss&expand=5236) + * [ ] [`_mm_mask_sqrt_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_sd&expand=5236) + * [ ] [`_mm_mask_sqrt_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sqrt_ss&expand=5236) + * [ ] [`_mm_mask_store_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd&expand=5236) + * [ ] [`_mm_mask_store_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss&expand=5236) + * [ ] [`_mm_mask_sub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_sd&expand=5236) + * [ ] [`_mm_mask_sub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_round_ss&expand=5236) + * [ ] [`_mm_mask_sub_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_sd&expand=5236) + * [ ] [`_mm_mask_sub_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_sub_ss&expand=5236) + * [ ] [`_mm_maskz_add_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_sd&expand=5236) + * [ ] [`_mm_maskz_add_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_round_ss&expand=5236) + * [ ] [`_mm_maskz_add_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_sd&expand=5236) + * [ ] [`_mm_maskz_add_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_add_ss&expand=5236) + * [ ] [`_mm_maskz_cvt_roundsd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundsd_ss&expand=5236) + * [ ] [`_mm_maskz_cvt_roundss_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundss_sd&expand=5236) + * [ ] [`_mm_maskz_cvtsd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsd_ss&expand=5236) + * [ ] [`_mm_maskz_cvtss_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtss_sd&expand=5236) + * [ ] [`_mm_maskz_div_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_sd&expand=5236) + * [ ] [`_mm_maskz_div_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_round_ss&expand=5236) + * [ ] [`_mm_maskz_div_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_sd&expand=5236) + * [ ] [`_mm_maskz_div_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_div_ss&expand=5236) + * [ ] [`_mm_maskz_fixupimm_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fixupimm_round_sd&expand=5236) + * [ ] [`_mm_maskz_fixupimm_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fixupimm_round_ss&expand=5236) + * [ ] [`_mm_maskz_fixupimm_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fixupimm_sd&expand=5236) + * [ ] [`_mm_maskz_fixupimm_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fixupimm_ss&expand=5236) + * [ ] [`_mm_maskz_fmadd_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_sd&expand=5236) + * [ ] [`_mm_maskz_fmadd_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_round_ss&expand=5236) + * [ ] [`_mm_maskz_fmadd_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_sd&expand=5236) + * [ ] [`_mm_maskz_fmadd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmadd_ss&expand=5236) + * [ ] [`_mm_maskz_fmsub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_sd&expand=5236) + * [ ] [`_mm_maskz_fmsub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_round_ss&expand=5236) + * [ ] [`_mm_maskz_fmsub_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_sd&expand=5236) + * [ ] [`_mm_maskz_fmsub_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fmsub_ss&expand=5236) + * [ ] [`_mm_maskz_fnmadd_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_sd&expand=5236) + * [ ] [`_mm_maskz_fnmadd_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_round_ss&expand=5236) + * [ ] [`_mm_maskz_fnmadd_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_sd&expand=5236) + * [ ] [`_mm_maskz_fnmadd_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmadd_ss&expand=5236) + * [ ] [`_mm_maskz_fnmsub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_sd&expand=5236) + * [ ] [`_mm_maskz_fnmsub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_round_ss&expand=5236) + * [ ] [`_mm_maskz_fnmsub_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_sd&expand=5236) + * [ ] [`_mm_maskz_fnmsub_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_fnmsub_ss&expand=5236) + * [ ] [`_mm_maskz_getexp_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_sd&expand=5236) + * [ ] [`_mm_maskz_getexp_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_round_ss&expand=5236) + * [ ] [`_mm_maskz_getexp_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_sd&expand=5236) + * [ ] [`_mm_maskz_getexp_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getexp_ss&expand=5236) + * [ ] [`_mm_maskz_getmant_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_sd&expand=5236) + * [ ] [`_mm_maskz_getmant_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_round_ss&expand=5236) + * [ ] [`_mm_maskz_getmant_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_sd&expand=5236) + * [ ] [`_mm_maskz_getmant_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_getmant_ss&expand=5236) + * [ ] [`_mm_maskz_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd&expand=5236) + * [ ] [`_mm_maskz_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss&expand=5236) + * [ ] [`_mm_maskz_max_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_sd&expand=5236) + * [ ] [`_mm_maskz_max_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_round_ss&expand=5236) + * [ ] [`_mm_maskz_max_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_sd&expand=5236) + * [ ] [`_mm_maskz_max_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_max_ss&expand=5236) + * [ ] [`_mm_maskz_min_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_sd&expand=5236) + * [ ] [`_mm_maskz_min_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_round_ss&expand=5236) + * [ ] [`_mm_maskz_min_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_sd&expand=5236) + * [ ] [`_mm_maskz_min_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_min_ss&expand=5236) + * [ ] [`_mm_maskz_move_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sd&expand=5236) + * [ ] [`_mm_maskz_move_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_ss&expand=5236) + * [ ] [`_mm_maskz_mul_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_sd&expand=5236) + * [ ] [`_mm_maskz_mul_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_round_ss&expand=5236) + * [ ] [`_mm_maskz_mul_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_sd&expand=5236) + * [ ] [`_mm_maskz_mul_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_mul_ss&expand=5236) + * [ ] [`_mm_maskz_rcp14_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp14_sd&expand=5236) + * [ ] [`_mm_maskz_rcp14_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rcp14_ss&expand=5236) + * [ ] [`_mm_maskz_roundscale_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_sd&expand=5236) + * [ ] [`_mm_maskz_roundscale_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_round_ss&expand=5236) + * [ ] [`_mm_maskz_roundscale_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_sd&expand=5236) + * [ ] [`_mm_maskz_roundscale_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_roundscale_ss&expand=5236) + * [ ] [`_mm_maskz_rsqrt14_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt14_sd&expand=5236) + * [ ] [`_mm_maskz_rsqrt14_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_rsqrt14_ss&expand=5236) + * [ ] [`_mm_maskz_scalef_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_sd&expand=5236) + * [ ] [`_mm_maskz_scalef_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_round_ss&expand=5236) + * [ ] [`_mm_maskz_scalef_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_sd&expand=5236) + * [ ] [`_mm_maskz_scalef_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_scalef_ss&expand=5236) + * [ ] [`_mm_maskz_sqrt_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_sd&expand=5236) + * [ ] [`_mm_maskz_sqrt_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_round_ss&expand=5236) + * [ ] [`_mm_maskz_sqrt_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_sd&expand=5236) + * [ ] [`_mm_maskz_sqrt_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sqrt_ss&expand=5236) + * [ ] [`_mm_maskz_sub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_sd&expand=5236) + * [ ] [`_mm_maskz_sub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_round_ss&expand=5236) + * [ ] [`_mm_maskz_sub_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_sd&expand=5236) + * [ ] [`_mm_maskz_sub_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_sub_ss&expand=5236) + * [ ] [`_mm_max_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_sd&expand=5236) + * [ ] [`_mm_max_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_round_ss&expand=5236) + * [ ] [`_mm_min_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_sd&expand=5236) + * [ ] [`_mm_min_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_round_ss&expand=5236) + * [ ] [`_mm_mul_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_sd&expand=5236) + * [ ] [`_mm_mul_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_round_ss&expand=5236) + * [ ] [`_mm_rcp14_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp14_sd&expand=5236) + * [ ] [`_mm_rcp14_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp14_ss&expand=5236) + * [ ] [`_mm_roundscale_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_sd&expand=5236) + * [ ] [`_mm_roundscale_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_round_ss&expand=5236) + * [ ] [`_mm_roundscale_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_sd&expand=5236) + * [ ] [`_mm_roundscale_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_roundscale_ss&expand=5236) + * [ ] [`_mm_rsqrt14_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt14_sd&expand=5236) + * [ ] [`_mm_rsqrt14_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt14_ss&expand=5236) + * [ ] [`_mm_scalef_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_sd&expand=5236) + * [ ] [`_mm_scalef_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_round_ss&expand=5236) + * [ ] [`_mm_scalef_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_sd&expand=5236) + * [ ] [`_mm_scalef_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_scalef_ss&expand=5236) + * [ ] [`_mm_sqrt_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_sd&expand=5236) + * [ ] [`_mm_sqrt_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_round_ss&expand=5236) + * [ ] [`_mm_sub_round_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_sd&expand=5236) + * [ ] [`_mm_sub_round_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_round_ss&expand=5236) +

diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 2160744a5c..71adc0fd5d 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -87,3836 +87,5616 @@ pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { transmute(simd_select_bitmask(k, abs, zero)) } -/// Returns vector of type `__m512d` with all elements set to zero. +/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_ps&expand=65) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_setzero_pd() -> __m512d { - // All-0 is a properly initialized __m512d - mem::zeroed() +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 { + let a = _mm512_set1_epi32(0x7FFFFFFF); // from LLVM code + let b = transmute::(v2.as_f32x16()); + let abs = _mm512_and_epi32(a, b); + transmute(abs) } -/// Returns vector of type `__m512d` with all elements set to zero. +/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_ps&expand=66) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_setzero_ps() -> __m512 { - // All-0 is a properly initialized __m512 - mem::zeroed() +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 { + let abs = _mm512_abs_ps(v2).as_f32x16(); + transmute(simd_select_bitmask(k, abs, src.as_f32x16())) } -/// Returns vector of type `__m512i` with all elements set to zero. +/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_pd&expand=60) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vxorps))] -pub unsafe fn _mm512_setzero_si512() -> __m512i { - // All-0 is a properly initialized __m512i - mem::zeroed() +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d { + let a = _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF); // from LLVM code + let b = transmute::(v2.as_f64x8()); + let abs = _mm512_and_epi64(a, b); + transmute(abs) } -/// Sets packed 32-bit integers in `dst` with the supplied values in reverse -/// order. +/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_pd&expand=61) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_setr_epi32( - e15: i32, - e14: i32, - e13: i32, - e12: i32, - e11: i32, - e10: i32, - e9: i32, - e8: i32, - e7: i32, - e6: i32, - e5: i32, - e4: i32, - e3: i32, - e2: i32, - e1: i32, - e0: i32, -) -> __m512i { - let r = i32x16( - e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, - ); - transmute(r) +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d { + let abs = _mm512_abs_pd(v2).as_f64x8(); + transmute(simd_select_bitmask(k, abs, src.as_f64x8())) } -/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. +/// Add packed 32-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi32&expand=100) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_i32gather_pd(offsets: __m256i, slice: *const u8, scale: i32) -> __m512d { - let zero = _mm512_setzero_pd().as_f64x8(); - let neg_one = -1; - let slice = slice as *const i8; - let offsets = offsets.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vgatherdpd(zero, slice, offsets, neg_one, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpaddd))] +pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_add(a.as_i32x16(), b.as_i32x16())) } -/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. +/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi32&expand=101) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i32gather_pd( - src: __m512d, - mask: __mmask8, - offsets: __m256i, - slice: *const u8, - scale: i32, -) -> __m512d { - let src = src.as_f64x8(); - let slice = slice as *const i8; - let offsets = offsets.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vgatherdpd(src, slice, offsets, mask as i8, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpaddd))] +pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let add = _mm512_add_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, add, src.as_i32x16())) } -/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. +/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi32&expand=102) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_i64gather_pd(offsets: __m512i, slice: *const u8, scale: i32) -> __m512d { - let zero = _mm512_setzero_pd().as_f64x8(); - let neg_one = -1; - let slice = slice as *const i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vgatherqpd(zero, slice, offsets, neg_one, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpaddd))] +pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let add = _mm512_add_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, add, zero)) } -/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. +/// Add packed 64-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_epi64&expand=109) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i64gather_pd( - src: __m512d, - mask: __mmask8, - offsets: __m512i, - slice: *const u8, - scale: i32, -) -> __m512d { - let src = src.as_f64x8(); - let slice = slice as *const i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vgatherqpd(src, slice, offsets, mask as i8, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpaddq))] +pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_add(a.as_i64x8(), b.as_i64x8())) } -/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. +/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_epi64&expand=110) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m256 { - let zero = _mm256_setzero_ps().as_f32x8(); - let neg_one = -1; - let slice = slice as *const i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vgatherqps(zero, slice, offsets, neg_one, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpaddq))] +pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let add = _mm512_add_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, add, src.as_i64x8())) } -/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. +/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_epi64&expand=111) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i64gather_ps( - src: __m256, - mask: __mmask8, - offsets: __m512i, - slice: *const u8, - scale: i32, -) -> __m256 { - let src = src.as_f32x8(); - let slice = slice as *const i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vgatherqps(src, slice, offsets, mask as i8, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpaddq))] +pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let add = _mm512_add_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, add, zero)) } -/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_ps&expand=139) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m512 { - let zero = _mm512_setzero_ps().as_f32x16(); - let neg_one = -1; - let slice = slice as *const i8; - let offsets = offsets.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vgatherdps(zero, slice, offsets, neg_one, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vaddps))] +pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 { + transmute(simd_add(a.as_f32x16(), b.as_f32x16())) } -/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_ps&expand=140) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i32gather_ps( - src: __m512, - mask: __mmask16, - offsets: __m512i, - slice: *const u8, - scale: i32, -) -> __m512 { - let src = src.as_f32x16(); - let slice = slice as *const i8; - let offsets = offsets.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vgatherdps(src, slice, offsets, mask as i16, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vaddps))] +pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + let add = _mm512_add_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, add, src.as_f32x16())) } -/// Gather 32-bit integers from memory using 32-bit indices. +/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_ps&expand=141) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_i32gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i { - let zero = _mm512_setzero_si512().as_i32x16(); - let neg_one = -1; - let slice = slice as *const i8; - let offsets = offsets.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpgatherdd(zero, slice, offsets, neg_one, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vaddps))] +pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + let add = _mm512_add_ps(a, b).as_f32x16(); + let zero = _mm512_setzero_ps().as_f32x16(); + transmute(simd_select_bitmask(k, add, zero)) } -/// Gather 32-bit integers from memory using 32-bit indices. +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_add_pd&expand=127) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i32gather_epi32( - src: __m512i, - mask: __mmask16, - offsets: __m512i, - slice: *const u8, - scale: i32, -) -> __m512i { - let src = src.as_i32x16(); - let mask = mask as i16; - let slice = slice as *const i8; - let offsets = offsets.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpgatherdd(src, slice, offsets, mask, $imm8) - }; - } - let r = constify_imm8!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vaddpd))] +pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d { + transmute(simd_add(a.as_f64x8(), b.as_f64x8())) } -/// Gather 64-bit integers from memory using 32-bit indices. +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_add_pd&expand=128) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_i32gather_epi64(offsets: __m256i, slice: *const u8, scale: i32) -> __m512i { - let zero = _mm512_setzero_si512().as_i64x8(); - let neg_one = -1; - let slice = slice as *const i8; - let offsets = offsets.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vpgatherdq(zero, slice, offsets, neg_one, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vaddpd))] +pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let add = _mm512_add_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, add, src.as_f64x8())) } -/// Gather 64-bit integers from memory using 32-bit indices. +/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_add_pd&expand=129) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i32gather_epi64( - src: __m512i, - mask: __mmask8, - offsets: __m256i, - slice: *const u8, - scale: i32, -) -> __m512i { - let src = src.as_i64x8(); - let mask = mask as i8; - let slice = slice as *const i8; - let offsets = offsets.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vpgatherdq(src, slice, offsets, mask, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vaddpd))] +pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let add = _mm512_add_pd(a, b).as_f64x8(); + let zero = _mm512_setzero_pd().as_f64x8(); + transmute(simd_select_bitmask(k, add, zero)) } -/// Gather 64-bit integers from memory using 64-bit indices. +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi32&expand=5694) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_i64gather_epi64(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i { - let zero = _mm512_setzero_si512().as_i64x8(); - let neg_one = -1; - let slice = slice as *const i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpgatherqq(zero, slice, offsets, neg_one, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsubd))] +pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) } -/// Gather 64-bit integers from memory using 64-bit indices. +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi32&expand=5692) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i64gather_epi64( - src: __m512i, - mask: __mmask8, - offsets: __m512i, - slice: *const u8, - scale: i32, -) -> __m512i { - let src = src.as_i64x8(); - let mask = mask as i8; - let slice = slice as *const i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpgatherqq(src, slice, offsets, mask, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsubd))] +pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let sub = _mm512_sub_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, sub, src.as_i32x16())) } -/// Gather 32-bit integers from memory using 64-bit indices. +/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi32&expand=5693) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_i64gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m256i { - let zeros = _mm256_setzero_si256().as_i32x8(); - let neg_one = -1; - let slice = slice as *const i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpgatherqd(zeros, slice, offsets, neg_one, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsubd))] +pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let sub = _mm512_sub_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, sub, zero)) } -/// Gather 32-bit integers from memory using 64-bit indices. +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_epi64&expand=5703) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i64gather_epi32( - src: __m256i, - mask: __mmask8, - offsets: __m512i, - slice: *const u8, - scale: i32, -) -> __m256i { - let src = src.as_i32x8(); - let mask = mask as i8; - let slice = slice as *const i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpgatherqd(src, slice, offsets, mask, $imm8) - }; - } - let r = constify_imm8_gather!(scale, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsubq))] +pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) } -/// Scatter double-precision (64-bit) floating-point elements from memory using 32-bit indices. +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_epi64&expand=5701) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_i32scatter_pd(slice: *mut u8, offsets: __m256i, src: __m512d, scale: i32) { - let src = src.as_f64x8(); - let neg_one = -1; - let slice = slice as *mut i8; - let offsets = offsets.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vscatterdpd(slice, neg_one, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vpsubq))] +pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let sub = _mm512_sub_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, sub, src.as_i64x8())) } -/// Scatter double-precision (64-bit) floating-point elements from src into memory using 32-bit indices. +/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_epi64&expand=5702) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i32scatter_pd( - slice: *mut u8, - mask: __mmask8, - offsets: __m256i, - src: __m512d, - scale: i32, -) { - let src = src.as_f64x8(); - let slice = slice as *mut i8; - let offsets = offsets.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vscatterdpd(slice, mask as i8, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vpsubq))] +pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let add = _mm512_sub_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, add, zero)) } -/// Scatter double-precision (64-bit) floating-point elements from src into memory using 64-bit indices. +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_ps&expand=5733) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_i64scatter_pd(slice: *mut u8, offsets: __m512i, src: __m512d, scale: i32) { - let src = src.as_f64x8(); - let neg_one = -1; - let slice = slice as *mut i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vscatterqpd(slice, neg_one, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vsubps))] +pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 { + transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) } -/// Scatter double-precision (64-bit) floating-point elements from src into memory using 64-bit indices. +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_ps&expand=5731) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i64scatter_pd( - slice: *mut u8, - mask: __mmask8, - offsets: __m512i, - src: __m512d, - scale: i32, -) { - let src = src.as_f64x8(); - let slice = slice as *mut i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vscatterqpd(slice, mask as i8, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vsubps))] +pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + let sub = _mm512_sub_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, sub, src.as_f32x16())) } -/// Scatter single-precision (32-bit) floating-point elements from memory using 32-bit indices. +/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_ps&expand=5732) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_i32scatter_ps(slice: *mut u8, offsets: __m512i, src: __m512, scale: i32) { - let src = src.as_f32x16(); - let neg_one = -1; - let slice = slice as *mut i8; - let offsets = offsets.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vscatterdps(slice, neg_one, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vsubps))] +pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + let sub = _mm512_sub_ps(a, b).as_f32x16(); + let zero = _mm512_setzero_ps().as_f32x16(); + transmute(simd_select_bitmask(k, sub, zero)) } -/// Scatter single-precision (32-bit) floating-point elements from src into memory using 32-bit indices. +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sub_pd&expand=5721) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i32scatter_ps( - slice: *mut u8, - mask: __mmask16, - offsets: __m512i, - src: __m512, - scale: i32, -) { - let src = src.as_f32x16(); - let slice = slice as *mut i8; - let offsets = offsets.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vscatterdps(slice, mask as i16, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vsubpd))] +pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d { + transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) } -/// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices. +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sub_pd&expand=5719) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_i64scatter_ps(slice: *mut u8, offsets: __m512i, src: __m256, scale: i32) { - let src = src.as_f32x8(); - let neg_one = -1; - let slice = slice as *mut i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vscatterqps(slice, neg_one, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vsubpd))] +pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let sub = _mm512_sub_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, sub, src.as_f64x8())) } -/// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices. +/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sub_pd&expand=5720) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i64scatter_ps( - slice: *mut u8, - mask: __mmask8, - offsets: __m512i, - src: __m256, - scale: i32, -) { - let src = src.as_f32x8(); - let slice = slice as *mut i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vscatterqps(slice, mask as i8, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vsubpd))] +pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let sub = _mm512_sub_pd(a, b).as_f64x8(); + let zero = _mm512_setzero_pd().as_f64x8(); + transmute(simd_select_bitmask(k, sub, zero)) } -/// Scatter 64-bit integers from src into memory using 32-bit indices. +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epi32&expand=3907) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_i32scatter_epi64(slice: *mut u8, offsets: __m256i, src: __m512i, scale: i32) { - let src = src.as_i64x8(); - let neg_one = -1; - let slice = slice as *mut i8; - let offsets = offsets.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vpscatterdq(slice, neg_one, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vpmuldq))] +pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vpmuldq(a.as_i32x16(), b.as_i32x16())) } -/// Scatter 64-bit integers from src into memory using 32-bit indices. +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epi32&expand=3905) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i32scatter_epi64( - slice: *mut u8, - mask: __mmask8, - offsets: __m256i, - src: __m512i, - scale: i32, -) { - let src = src.as_i64x8(); - let mask = mask as i8; - let slice = slice as *mut i8; - let offsets = offsets.as_i32x8(); - macro_rules! call { - ($imm8:expr) => { - vpscatterdq(slice, mask, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vpmuldq))] +pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let mul = _mm512_mul_epi32(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, mul, src.as_i64x8())) } -/// Scatter 64-bit integers from src into memory using 64-bit indices. +/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epi32&expand=3906) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_i64scatter_epi64(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) { - let src = src.as_i64x8(); - let neg_one = -1; - let slice = slice as *mut i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpscatterqq(slice, neg_one, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vpmuldq))] +pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let mul = _mm512_mul_epi32(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, mul, zero)) } -/// Scatter 64-bit integers from src into memory using 64-bit indices. +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullo_epi&expand=4005) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i64scatter_epi64( - slice: *mut u8, - mask: __mmask8, - offsets: __m512i, - src: __m512i, - scale: i32, -) { - let src = src.as_i64x8(); - let mask = mask as i8; - let slice = slice as *mut i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpscatterqq(slice, mask, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vpmulld))] +pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) } -/// Scatter 32-bit integers from src into memory using 32-bit indices. +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullo_epi32&expand=4003) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_i32scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) { - let src = src.as_i32x16(); - let neg_one = -1; - let slice = slice as *mut i8; - let offsets = offsets.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpscatterdd(slice, neg_one, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vpmulld))] +pub unsafe fn _mm512_mask_mullo_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + let mul = _mm512_mullo_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, mul, src.as_i32x16())) } -/// Scatter 32-bit integers from src into memory using 32-bit indices. +/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mullo_epi32&expand=4004) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i32scatter_epi32( - slice: *mut u8, - mask: __mmask16, - offsets: __m512i, - src: __m512i, - scale: i32, -) { - let src = src.as_i32x16(); - let mask = mask as i16; - let slice = slice as *mut i8; - let offsets = offsets.as_i32x16(); - macro_rules! call { - ($imm8:expr) => { - vpscatterdd(slice, mask, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +#[cfg_attr(test, assert_instr(vpmulld))] +pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let mul = _mm512_mullo_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, mul, zero)) } -/// Scatter 32-bit integers from src into memory using 64-bit indices. +/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mullox_epi64&expand=4017) +/// +/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_i64scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m256i, scale: i32) { - let src = src.as_i32x8(); - let neg_one = -1; - let slice = slice as *mut i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpscatterqd(slice, neg_one, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) } -/// Scatter 32-bit integers from src into memory using 64-bit indices. +/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mullox&expand=4016) +/// +/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))] -#[rustc_args_required_const(4)] -pub unsafe fn _mm512_mask_i64scatter_epi32( - slice: *mut u8, - mask: __mmask8, - offsets: __m512i, - src: __m256i, - scale: i32, -) { - let src = src.as_i32x8(); - let mask = mask as i8; - let slice = slice as *mut i8; - let offsets = offsets.as_i64x8(); - macro_rules! call { - ($imm8:expr) => { - vpscatterqd(slice, mask, offsets, src, $imm8) - }; - } - constify_imm8_gather!(scale, call); +pub unsafe fn _mm512_mask_mullox_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + b: __m512i, +) -> __m512i { + let mul = _mm512_mullox_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, mul, src.as_i64x8())) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_epu32&expand=3916) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - transmute(vprold(a.as_i32x16(), imm8)) +#[cfg_attr(test, assert_instr(vpmuludq))] +pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i { + transmute(vpmuludq(a.as_u32x16(), b.as_u32x16())) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_epu32&expand=3914) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let rol = vprold(a.as_i32x16(), imm8); - transmute(simd_select_bitmask(k, rol, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vpmuludq))] +pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let mul = _mm512_mul_epu32(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, mul, src.as_u64x8())) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_epu32&expand=3915) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let rol = vprold(a.as_i32x16(), imm8); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, rol, zero)) +#[cfg_attr(test, assert_instr(vpmuludq))] +pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let mul = _mm512_mul_epu32(a, b).as_u64x8(); + let zero = _mm512_setzero_si512().as_u64x8(); + transmute(simd_select_bitmask(k, mul, zero)) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm512_mul_ps&expand=3934) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 233))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - transmute(vprord(a.as_i32x16(), imm8)) +#[cfg_attr(test, assert_instr(vmulps))] +pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 { + transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_ps&expand=3932) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let ror = vprord(a.as_i32x16(), imm8); - transmute(simd_select_bitmask(k, ror, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vmulps))] +pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + let mul = _mm512_mul_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, mul, src.as_f32x16())) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_ps&expand=3933) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let ror = vprord(a.as_i32x16(), imm8); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, ror, zero)) +#[cfg_attr(test, assert_instr(vmulps))] +pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + let mul = _mm512_mul_ps(a, b).as_f32x16(); + let zero = _mm512_setzero_ps().as_f32x16(); + transmute(simd_select_bitmask(k, mul, zero)) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mul_pd&expand=3925) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - transmute(vprolq(a.as_i64x8(), imm8)) +#[cfg_attr(test, assert_instr(vmulpd))] +pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d { + transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). RM. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_mul_pd&expand=3923) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let rol = vprolq(a.as_i64x8(), imm8); - transmute(simd_select_bitmask(k, rol, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vmulpd))] +pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let mul = _mm512_mul_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, mul, src.as_f64x8())) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_mul_pd&expand=3924) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let rol = vprolq(a.as_i64x8(), imm8); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, rol, zero)) +#[cfg_attr(test, assert_instr(vmulpd))] +pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let mul = _mm512_mul_pd(a, b).as_f64x8(); + let zero = _mm512_setzero_pd().as_f64x8(); + transmute(simd_select_bitmask(k, mul, zero)) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_ps&expand=2162) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - transmute(vprorq(a.as_i64x8(), imm8)) +#[cfg_attr(test, assert_instr(vdivps))] +pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 { + transmute(simd_div(a.as_f32x16(), b.as_f32x16())) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_ps&expand=2163) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let ror = vprorq(a.as_i64x8(), imm8); - transmute(simd_select_bitmask(k, ror, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vdivps))] +pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + let div = _mm512_div_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, div, src.as_f32x16())) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_ps&expand=2164) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { - assert!(imm8 >= 0 && imm8 <= 255); - let ror = vprorq(a.as_i64x8(), imm8); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, ror, zero)) +#[cfg_attr(test, assert_instr(vdivps))] +pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + let div = _mm512_div_ps(a, b).as_f32x16(); + let zero = _mm512_setzero_ps().as_f32x16(); + transmute(simd_select_bitmask(k, div, zero)) } -/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_div_pd&expand=2153) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - transmute(vpsllid(a.as_i32x16(), imm8)) +#[cfg_attr(test, assert_instr(vdivpd))] +pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d { + transmute(simd_div(a.as_f64x8(), b.as_f64x8())) } -/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_div_pd&expand=2154) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsllid(a.as_i32x16(), imm8); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vdivpd))] +pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let div = _mm512_div_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, div, src.as_f64x8())) } -/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_div_pd&expand=2155) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsllid(a.as_i32x16(), imm8); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vdivpd))] +pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let div = _mm512_div_pd(a, b).as_f64x8(); + let zero = _mm512_setzero_pd().as_f64x8(); + transmute(simd_select_bitmask(k, div, zero)) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi32&expand=3582) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - transmute(vpsrlid(a.as_i32x16(), imm8)) +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vpmaxsd(a.as_i32x16(), b.as_i32x16())) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epi32&expand=3580) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsrlid(a.as_i32x16(), imm8); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_max_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, max, src.as_i32x16())) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epi32&expand=3581) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsrlid(a.as_i32x16(), imm8); +#[cfg_attr(test, assert_instr(vpmaxsd))] +pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_max_epi32(a, b).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epi64&expand=3591) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - transmute(vpslliq(a.as_i64x8(), imm8)) +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(vpmaxsq(a.as_i64x8(), b.as_i64x8())) } -/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epi64&expand=3589) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpslliq(a.as_i64x8(), imm8); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_max_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, max, src.as_i64x8())) } -/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epi64&expand=3590) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpslliq(a.as_i64x8(), imm8); +#[cfg_attr(test, assert_instr(vpmaxsq))] +pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_max_epi64(a, b).as_i64x8(); let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_ps&expand=3655) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - transmute(vpsrliq(a.as_i64x8(), imm8)) +#[cfg_attr(test, assert_instr(vmaxps))] +pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 { + transmute(simd_fmax(a.as_f32x16(), b.as_f32x16())) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_ps&expand=3653) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsrliq(a.as_i64x8(), imm8); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vmaxps))] +pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + let max = _mm512_max_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, max, src.as_f32x16())) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_ps&expand=3654) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsrliq(a.as_i64x8(), imm8); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vmaxps))] +pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + let max = _mm512_max_ps(a, b).as_f32x16(); + let zero = _mm512_setzero_ps().as_f32x16(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_pd&expand=3645) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { - transmute(vpslld(a.as_i32x16(), count.as_i32x4())) +#[cfg_attr(test, assert_instr(vmaxpd))] +pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d { + transmute(simd_fmax(a.as_f64x8(), b.as_f64x8())) } -/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_pd&expand=3643) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm512_mask_sll_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sll_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vmaxpd))] +pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let max = _mm512_max_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, max, src.as_f64x8())) } -/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_pd&expand=3644) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpslld))] -pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sll_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vmaxpd))] +pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let max = _mm512_max_pd(a, b).as_f64x8(); + let zero = _mm512_setzero_pd().as_f64x8(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_max_epu32&expand=3618) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) +#[cfg_attr(test, assert_instr(vpmaxud))] +pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i { + transmute(vpmaxud(a.as_u32x16(), b.as_u32x16())) } -/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epu32&expand=3616) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm512_mask_srl_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_srl_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vpmaxud))] +pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_max_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, max, src.as_u32x16())) } -/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epu32&expand=3617) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrld))] -pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_srl_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vpmaxud))] +pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_max_epu32(a, b).as_u32x16(); + let zero = _mm512_setzero_si512().as_u32x16(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=max_epu64&expand=3627) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i { + transmute(vpmaxuq(a.as_u64x8(), b.as_u64x8())) } -/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_max_epu64&expand=3625) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm512_mask_sll_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sll_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_max_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, max, src.as_u64x8())) } -/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_max_epu&expand=3626) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllq))] -pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sll_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vpmaxuq))] +pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_max_epu64(a, b).as_u64x8(); + let zero = _mm512_setzero_si512().as_u64x8(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst. +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epi32&expand=3696) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) +#[cfg_attr(test, assert_instr(vpminsd))] +pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vpminsd(a.as_i32x16(), b.as_i32x16())) } -/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epi32&expand=3694) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm512_mask_srl_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_srl_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vpminsd))] +pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_min_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, max, src.as_i32x16())) } -/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epi32&expand=3695) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlq))] -pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_srl_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vpminsd))] +pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_min_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epi64&expand=3705) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) +#[cfg_attr(test, assert_instr(vpminsq))] +pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(vpminsq(a.as_i64x8(), b.as_i64x8())) } -/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epi64&expand=3703) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm512_mask_sra_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sra_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vpminsq))] +pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_min_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, max, src.as_i64x8())) } -/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epi64&expand=3704) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad))] -pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sra_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vpminsq))] +pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_min_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_ps&expand=3769) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { - transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) +#[cfg_attr(test, assert_instr(vminps))] +pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 { + transmute(simd_fmin(a.as_f32x16(), b.as_f32x16())) } -/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_ps&expand=3767) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm512_mask_sra_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m128i, -) -> __m512i { - let shf = _mm512_sra_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vminps))] +pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 { + let max = _mm512_min_ps(a, b).as_f32x16(); + transmute(simd_select_bitmask(k, max, src.as_f32x16())) } -/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_ps&expand=3768) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraq))] -pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { - let shf = _mm512_sra_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vminps))] +pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 { + let max = _mm512_min_ps(a, b).as_f32x16(); + let zero = _mm512_setzero_ps().as_f32x16(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_pd&expand=3759) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - transmute(vpsraid(a.as_i32x16(), imm8)) +#[cfg_attr(test, assert_instr(vminpd))] +pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d { + transmute(simd_fmin(a.as_f64x8(), b.as_f64x8())) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_pd&expand=3757) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsraid(a.as_i32x16(), imm8); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vminpd))] +pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let max = _mm512_min_pd(a, b).as_f64x8(); + transmute(simd_select_bitmask(k, max, src.as_f64x8())) } -/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_pd&expand=3758) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsraid(a.as_i32x16(), imm8); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vminpd))] +pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d { + let max = _mm512_min_pd(a, b).as_f64x8(); + let zero = _mm512_setzero_pd().as_f64x8(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epu32&expand=3732) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] -#[rustc_args_required_const(1)] -pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - transmute(vpsraiq(a.as_i64x8(), imm8)) +#[cfg_attr(test, assert_instr(vpminud))] +pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i { + transmute(vpminud(a.as_u32x16(), b.as_u32x16())) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epu32&expand=3730) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] -#[rustc_args_required_const(3)] -pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsraiq(a.as_i64x8(), imm8); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vpminud))] +pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_min_epu32(a, b).as_u32x16(); + transmute(simd_select_bitmask(k, max, src.as_u32x16())) } -/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epu32&expand=3731) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] -#[rustc_args_required_const(2)] -pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { - assert!(imm8 <= 255); - let shf = vpsraiq(a.as_i64x8(), imm8); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vpminud))] +pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_min_epu32(a, b).as_u32x16(); + let zero = _mm512_setzero_si512().as_u32x16(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_min_epu64&expand=3741) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) +#[cfg_attr(test, assert_instr(vpminuq))] +pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i { + transmute(vpminuq(a.as_u64x8(), b.as_u64x8())) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_min_epu64&expand=3739) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm512_mask_srav_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_srav_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vpminuq))] +pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_min_epu64(a, b).as_u64x8(); + transmute(simd_select_bitmask(k, max, src.as_u64x8())) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_min_epu64&expand=3740) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsravd))] -pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srav_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vpminuq))] +pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let max = _mm512_min_epu64(a, b).as_u64x8(); + let zero = _mm512_setzero_si512().as_u64x8(); + transmute(simd_select_bitmask(k, max, zero)) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_ps&expand=5371) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) +#[cfg_attr(test, assert_instr(vsqrtps))] +pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 { + transmute(vsqrtps(a.as_f32x16())) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_ps&expand=5369) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm512_mask_srav_epi64( - src: __m512i, - k: __mmask8, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_srav_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vsqrtps))] +pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 { + let sqrt = _mm512_sqrt_ps(a).as_f32x16(); + transmute(simd_select_bitmask(k, sqrt, src.as_f32x16())) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_ps&expand=5370) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsravq))] -pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srav_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vsqrtps))] +pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 { + let sqrt = _mm512_sqrt_ps(a).as_f32x16(); + let zero = _mm512_setzero_ps().as_f32x16(); + transmute(simd_select_bitmask(k, sqrt, zero)) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sqrt_pd&expand=5362) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d { + transmute(vsqrtpd(a.as_f64x8())) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sqrt_pd&expand=5360) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm512_mask_rolv_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let rol = _mm512_rolv_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, rol, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d { + let sqrt = _mm512_sqrt_pd(a).as_f64x8(); + transmute(simd_select_bitmask(k, sqrt, src.as_f64x8())) } -/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sqrt_pd&expand=5361) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolvd))] -pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let rol = _mm512_rolv_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, rol, zero)) +#[cfg_attr(test, assert_instr(vsqrtpd))] +pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d { + let sqrt = _mm512_sqrt_pd(a).as_f64x8(); + let zero = _mm512_setzero_pd().as_f64x8(); + transmute(simd_select_bitmask(k, sqrt, zero)) } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// Returns vector of type `__m512d` with all elements set to zero. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vxorps))] +pub unsafe fn _mm512_setzero_pd() -> __m512d { + // All-0 is a properly initialized __m512d + mem::zeroed() } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Returns vector of type `__m512d` with all elements set to zero. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm512_mask_rorv_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - b: __m512i, -) -> __m512i { - let ror = _mm512_rorv_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, ror, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vxorps))] +pub unsafe fn _mm512_setzero_ps() -> __m512 { + // All-0 is a properly initialized __m512 + mem::zeroed() } -/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Returns vector of type `__m512i` with all elements set to zero. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorvd))] -pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let ror = _mm512_rorv_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, ror, zero)) +#[cfg_attr(test, assert_instr(vxorps))] +pub unsafe fn _mm512_setzero_si512() -> __m512i { + // All-0 is a properly initialized __m512i + mem::zeroed() } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712) +/// Sets packed 32-bit integers in `dst` with the supplied values in reverse +/// order. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) +pub unsafe fn _mm512_setr_epi32( + e15: i32, + e14: i32, + e13: i32, + e12: i32, + e11: i32, + e10: i32, + e9: i32, + e8: i32, + e7: i32, + e6: i32, + e5: i32, + e4: i32, + e3: i32, + e2: i32, + e1: i32, + e0: i32, +) -> __m512i { + let r = i32x16( + e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, + ); + transmute(r) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let rol = _mm512_rolv_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, rol, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_i32gather_pd(offsets: __m256i, slice: *const u8, scale: i32) -> __m512d { + let zero = _mm512_setzero_pd().as_f64x8(); + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i32x8(); + macro_rules! call { + ($imm8:expr) => { + vgatherdpd(zero, slice, offsets, neg_one, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprolvq))] -pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let rol = _mm512_rolv_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, rol, zero)) +#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i32gather_pd( + src: __m512d, + mask: __mmask8, + offsets: __m256i, + slice: *const u8, + scale: i32, +) -> __m512d { + let src = src.as_f64x8(); + let slice = slice as *const i8; + let offsets = offsets.as_i32x8(); + macro_rules! call { + ($imm8:expr) => { + vgatherdpd(src, slice, offsets, mask as i8, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_i64gather_pd(offsets: __m512i, slice: *const u8, scale: i32) -> __m512d { + let zero = _mm512_setzero_pd().as_f64x8(); + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vgatherqpd(zero, slice, offsets, neg_one, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let ror = _mm512_rorv_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, ror, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i64gather_pd( + src: __m512d, + mask: __mmask8, + offsets: __m512i, + slice: *const u8, + scale: i32, +) -> __m512d { + let src = src.as_f64x8(); + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vgatherqpd(src, slice, offsets, mask as i8, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vprorvq))] -pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let ror = _mm512_rorv_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, ror, zero)) +#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m256 { + let zero = _mm256_setzero_ps().as_f32x8(); + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vgatherqps(zero, slice, offsets, neg_one, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) +#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i64gather_ps( + src: __m256, + mask: __mmask8, + offsets: __m512i, + slice: *const u8, + scale: i32, +) -> __m256 { + let src = src.as_f32x8(); + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vgatherqps(src, slice, offsets, mask as i8, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm512_mask_sllv_epi32( - src: __m512i, - k: __mmask16, - a: __m512i, - count: __m512i, -) -> __m512i { - let shf = _mm512_sllv_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m512 { + let zero = _mm512_setzero_ps().as_f32x16(); + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i32x16(); + macro_rules! call { + ($imm8:expr) => { + vgatherdps(zero, slice, offsets, neg_one, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllvd))] -pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_sllv_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i32gather_ps( + src: __m512, + mask: __mmask16, + offsets: __m512i, + slice: *const u8, + scale: i32, +) -> __m512 { + let src = src.as_f32x16(); + let slice = slice as *const i8; + let offsets = offsets.as_i32x16(); + macro_rules! call { + ($imm8:expr) => { + vgatherdps(src, slice, offsets, mask as i16, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// Gather 32-bit integers from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi32) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) +#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_i32gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i { + let zero = _mm512_setzero_si512().as_i32x16(); + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i32x16(); + macro_rules! call { + ($imm8:expr) => { + vpgatherdd(zero, slice, offsets, neg_one, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Gather 32-bit integers from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi32) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm512_mask_srlv_epi32( +#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i32gather_epi32( src: __m512i, - k: __mmask16, - a: __m512i, - count: __m512i, + mask: __mmask16, + offsets: __m512i, + slice: *const u8, + scale: i32, ) -> __m512i { - let shf = _mm512_srlv_epi32(a, count).as_i32x16(); - transmute(simd_select_bitmask(k, shf, src.as_i32x16())) -} - -/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553) -#[inline] -#[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlvd))] -pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srlv_epi32(a, count).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, shf, zero)) + let src = src.as_i32x16(); + let mask = mask as i16; + let slice = slice as *const i8; + let offsets = offsets.as_i32x16(); + macro_rules! call { + ($imm8:expr) => { + vpgatherdd(src, slice, offsets, mask, $imm8) + }; + } + let r = constify_imm8!(scale, call); + transmute(r) } -/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// Gather 64-bit integers from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32gather_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) +#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_i32gather_epi64(offsets: __m256i, slice: *const u8, scale: i32) -> __m512i { + let zero = _mm512_setzero_si512().as_i64x8(); + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i32x8(); + macro_rules! call { + ($imm8:expr) => { + vpgatherdq(zero, slice, offsets, neg_one, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Gather 64-bit integers from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32gather_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm512_mask_sllv_epi64( +#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i32gather_epi64( src: __m512i, - k: __mmask8, - a: __m512i, - count: __m512i, + mask: __mmask8, + offsets: __m256i, + slice: *const u8, + scale: i32, ) -> __m512i { - let shf = _mm512_sllv_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + let src = src.as_i64x8(); + let mask = mask as i8; + let slice = slice as *const i8; + let offsets = offsets.as_i32x8(); + macro_rules! call { + ($imm8:expr) => { + vpgatherdq(src, slice, offsets, mask, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Gather 64-bit integers from memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsllvq))] -pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_sllv_epi64(a, count).as_i64x8(); +#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_i64gather_epi64(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i { let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) -} - -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563) -#[inline] -#[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { - transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vpgatherqq(zero, slice, offsets, neg_one, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Gather 64-bit integers from memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm512_mask_srlv_epi64( +#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i64gather_epi64( src: __m512i, - k: __mmask8, - a: __m512i, - count: __m512i, + mask: __mmask8, + offsets: __m512i, + slice: *const u8, + scale: i32, ) -> __m512i { - let shf = _mm512_srlv_epi64(a, count).as_i64x8(); - transmute(simd_select_bitmask(k, shf, src.as_i64x8())) + let src = src.as_i64x8(); + let mask = mask as i8; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vpgatherqq(src, slice, offsets, mask, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Gather 32-bit integers from memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64gather_epi32) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpsrlvq))] -pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { - let shf = _mm512_srlv_epi64(a, count).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, shf, zero)) +#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_i64gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m256i { + let zeros = _mm256_setzero_si256().as_i32x8(); + let neg_one = -1; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vpgatherqd(zeros, slice, offsets, neg_one, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. +/// Gather 32-bit integers from memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64gather_epi32) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_and(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i64gather_epi32( + src: __m256i, + mask: __mmask8, + offsets: __m512i, + slice: *const u8, + scale: i32, +) -> __m256i { + let src = src.as_i32x8(); + let mask = mask as i8; + let slice = slice as *const i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vpgatherqd(src, slice, offsets, mask, $imm8) + }; + } + let r = constify_imm8_gather!(scale, call); + transmute(r) } -/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Scatter double-precision (64-bit) floating-point elements from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let and = _mm512_and_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, and, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_i32scatter_pd(slice: *mut u8, offsets: __m256i, src: __m512d, scale: i32) { + let src = src.as_f64x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x8(); + macro_rules! call { + ($imm8:expr) => { + vscatterdpd(slice, neg_one, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Scatter double-precision (64-bit) floating-point elements from src into memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandd))] -pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let and = _mm512_and_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, and, zero)) +#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i32scatter_pd( + slice: *mut u8, + mask: __mmask8, + offsets: __m256i, + src: __m512d, + scale: i32, +) { + let src = src.as_f64x8(); + let slice = slice as *mut i8; + let offsets = offsets.as_i32x8(); + macro_rules! call { + ($imm8:expr) => { + vscatterdpd(slice, mask as i8, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. +/// Scatter double-precision (64-bit) floating-point elements from src into memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_and(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_i64scatter_pd(slice: *mut u8, offsets: __m512i, src: __m512d, scale: i32) { + let src = src.as_f64x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vscatterqpd(slice, neg_one, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Scatter double-precision (64-bit) floating-point elements from src into memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_pd) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let and = _mm512_and_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, and, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i64scatter_pd( + slice: *mut u8, + mask: __mmask8, + offsets: __m512i, + src: __m512d, + scale: i32, +) { + let src = src.as_f64x8(); + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vscatterqpd(slice, mask as i8, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Scatter single-precision (32-bit) floating-point elements from memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let and = _mm512_and_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, and, zero)) +#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_i32scatter_ps(slice: *mut u8, offsets: __m512i, src: __m512, scale: i32) { + let src = src.as_f32x16(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x16(); + macro_rules! call { + ($imm8:expr) => { + vscatterdps(slice, neg_one, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. +/// Scatter single-precision (32-bit) floating-point elements from src into memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpandq))] -pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_and(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i32scatter_ps( + slice: *mut u8, + mask: __mmask16, + offsets: __m512i, + src: __m512, + scale: i32, +) { + let src = src.as_f32x16(); + let slice = slice as *mut i8; + let offsets = offsets.as_i32x16(); + macro_rules! call { + ($imm8:expr) => { + vscatterdps(slice, mask as i16, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. +/// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_or(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_i64scatter_ps(slice: *mut u8, offsets: __m512i, src: __m256, scale: i32) { + let src = src.as_f32x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vscatterqps(slice, neg_one, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpord))] -pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let or = _mm512_or_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, or, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i64scatter_ps( + slice: *mut u8, + mask: __mmask8, + offsets: __m512i, + src: __m256, + scale: i32, +) { + let src = src.as_f32x8(); + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vscatterqps(slice, mask as i8, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Scatter 64-bit integers from src into memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpord))] -pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let or = _mm512_or_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, or, zero)) +#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_i32scatter_epi64(slice: *mut u8, offsets: __m256i, src: __m512i, scale: i32) { + let src = src.as_i64x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x8(); + macro_rules! call { + ($imm8:expr) => { + vpscatterdq(slice, neg_one, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. +/// Scatter 64-bit integers from src into memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_or(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i32scatter_epi64( + slice: *mut u8, + mask: __mmask8, + offsets: __m256i, + src: __m512i, + scale: i32, +) { + let src = src.as_i64x8(); + let mask = mask as i8; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x8(); + macro_rules! call { + ($imm8:expr) => { + vpscatterdq(slice, mask, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Scatter 64-bit integers from src into memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let or = _mm512_or_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, or, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_i64scatter_epi64(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) { + let src = src.as_i64x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vpscatterqq(slice, neg_one, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Scatter 64-bit integers from src into memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi64) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let or = _mm512_or_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, or, zero)) +#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i64scatter_epi64( + slice: *mut u8, + mask: __mmask8, + offsets: __m512i, + src: __m512i, + scale: i32, +) { + let src = src.as_i64x8(); + let mask = mask as i8; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vpscatterqq(slice, mask, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst. +/// Scatter 32-bit integers from src into memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vporq))] -pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_or(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_i32scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) { + let src = src.as_i32x16(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x16(); + macro_rules! call { + ($imm8:expr) => { + vpscatterdd(slice, neg_one, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. +/// Scatter 32-bit integers from src into memory using 32-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi32) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i32scatter_epi32( + slice: *mut u8, + mask: __mmask16, + offsets: __m512i, + src: __m512i, + scale: i32, +) { + let src = src.as_i32x16(); + let mask = mask as i16; + let slice = slice as *mut i8; + let offsets = offsets.as_i32x16(); + macro_rules! call { + ($imm8:expr) => { + vpscatterdd(slice, mask, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Scatter 32-bit integers from src into memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxord))] -pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let xor = _mm512_xor_epi32(a, b).as_i32x16(); - transmute(simd_select_bitmask(k, xor, src.as_i32x16())) +#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_i64scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m256i, scale: i32) { + let src = src.as_i32x8(); + let neg_one = -1; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vpscatterqd(slice, neg_one, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Scatter 32-bit integers from src into memory using 64-bit indices. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi32) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxord))] -pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { - let xor = _mm512_xor_epi32(a, b).as_i32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); - transmute(simd_select_bitmask(k, xor, zero)) +#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))] +#[rustc_args_required_const(4)] +pub unsafe fn _mm512_mask_i64scatter_epi32( + slice: *mut u8, + mask: __mmask8, + offsets: __m512i, + src: __m256i, + scale: i32, +) { + let src = src.as_i32x8(); + let mask = mask as i8; + let slice = slice as *mut i8; + let offsets = offsets.as_i64x8(); + macro_rules! call { + ($imm8:expr) => { + vpscatterqd(slice, mask, offsets, src, $imm8) + }; + } + constify_imm8_gather!(scale, call); } -/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprold(a.as_i32x16(), imm8)) } -/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let xor = _mm512_xor_epi64(a, b).as_i64x8(); - transmute(simd_select_bitmask(k, xor, src.as_i64x8())) +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprold(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, rol, src.as_i32x16())) } -/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { - let xor = _mm512_xor_epi64(a, b).as_i64x8(); - let zero = _mm512_setzero_si512().as_i64x8(); - transmute(simd_select_bitmask(k, xor, zero)) +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprold(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, rol, zero)) } -/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpxorq))] -pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { - transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vprold, imm8 = 233))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprord(a.as_i32x16(), imm8)) } -/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw -pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { - transmute(kandw(a, b)) -} - -/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210) -#[inline] -#[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw -pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { - transmute(kandw(a, b)) +#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprord(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, ror, src.as_i32x16())) } -/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw -pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { - transmute(korw(a, b)) +#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprord(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, ror, zero)) } -/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw -pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { - transmute(korw(a, b)) +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprolq(a.as_i64x8(), imm8)) } -/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw -pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { - transmute(kxorw(a, b)) +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprolq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, rol, src.as_i64x8())) } -/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw -pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { - transmute(kxorw(a, b)) +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprolq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, rol, zero)) } -/// Sets packed 32-bit integers in `dst` with the supplied values. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. /// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set_ps( - e0: f32, - e1: f32, - e2: f32, - e3: f32, - e4: f32, - e5: f32, - e6: f32, - e7: f32, - e8: f32, - e9: f32, - e10: f32, - e11: f32, - e12: f32, - e13: f32, - e14: f32, - e15: f32, -) -> __m512 { - _mm512_setr_ps( - e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, - ) +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprorq(a.as_i64x8(), imm8)) } -/// Sets packed 32-bit integers in `dst` with the supplied values in -/// reverse order. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_setr_ps( - e0: f32, - e1: f32, - e2: f32, - e3: f32, - e4: f32, - e5: f32, - e6: f32, - e7: f32, - e8: f32, - e9: f32, - e10: f32, - e11: f32, - e12: f32, - e13: f32, - e14: f32, - e15: f32, -) -> __m512 { - let r = f32x16::new( - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, - ); - transmute(r) +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprorq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, ror, src.as_i64x8())) } -/// Broadcast 64-bit float `a` to all elements of `dst`. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d { - transmute(f64x8::splat(a)) +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprorq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, ror, zero)) } -/// Broadcast 32-bit float `a` to all elements of `dst`. +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 { - transmute(f32x16::splat(a)) +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsllid(a.as_i32x16(), imm8)) } -/// Sets packed 32-bit integers in `dst` with the supplied values. +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set_epi32( - e15: i32, - e14: i32, - e13: i32, - e12: i32, - e11: i32, - e10: i32, - e9: i32, - e8: i32, - e7: i32, - e6: i32, - e5: i32, - e4: i32, - e3: i32, - e2: i32, - e1: i32, - e0: i32, -) -> __m512i { - _mm512_setr_epi32( - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, - ) +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsllid(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Broadcast 32-bit integer `a` to all elements of `dst`. +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i { - transmute(i32x16::splat(a)) +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsllid(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Broadcast 64-bit integer `a` to all elements of `dst`. +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { - transmute(i64x8::splat(a)) +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsrlid(a.as_i32x16(), imm8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_LT_OS) +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsrlid(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS) +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsrlid(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_NLT_US) +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpslliq(a.as_i64x8(), imm8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US) +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpslliq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_LE_OS) +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpslliq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS) +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsrliq(a.as_i64x8(), imm8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_NLE_US) +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsrliq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US) +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsrliq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector. +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ) +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpslld(a.as_i32x16(), count.as_i32x4())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ) +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_mask_sll_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector. +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ) +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ) +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr) => { - vcmpps( - a.as_f32x16(), - b.as_f32x16(), - $imm5, - neg_one, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_mask_srl_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 { - macro_rules! call { - ($imm5:expr) => { - vcmpps( - a.as_f32x16(), - b.as_f32x16(), - $imm5, - m as i16, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2, 3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3, 4)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm512_mask_cmp_round_ps_mask( - m: __mmask16, - a: __m512, - b: __m512, - op: i32, - sae: i32, -) -> __mmask16 { - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_mask_sll_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q) +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q) +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 { - _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q) +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_mask_srl_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { - _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q) +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_LT_OS) +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS) +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_mask_sra_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_NLT_US) +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US) +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_LE_OS) +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_mask_sra_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS) +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_NLE_US) +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsraid(a.as_i32x16(), imm8)) } -/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US) +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsraid(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ) +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsraid(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ) +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsraiq(a.as_i64x8(), imm8)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ) +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsraiq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp))] -pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ) +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsraiq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr) => { - vcmppd( - a.as_f64x8(), - b.as_f64x8(), - $imm5, - neg_one, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 { - macro_rules! call { - ($imm5:expr) => { - vcmppd( - a.as_f64x8(), - b.as_f64x8(), - $imm5, - m as i8, - _MM_FROUND_CUR_DIRECTION, - ) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_mask_srav_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2, 3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3, 4)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm512_mask_cmp_round_pd_mask( - m: __mmask8, - a: __m512d, - b: __m512d, - op: i32, - sae: i32, -) -> __mmask8 { - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q) +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_mask_srav_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q) +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { - _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q) +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vcmp, op = 0))] -pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { - _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q) +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_mask_rolv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, rol, src.as_i32x16())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr) => { - vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, rol, zero)) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 { - macro_rules! call { - ($imm5:expr) => { - vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2, 3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpss(a, b, $imm5, neg_one, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_mask_rorv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, ror, src.as_i32x16())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3, 4)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_mask_cmp_round_ss_mask( - m: __mmask8, - a: __m128, - b: __m128, - op: i32, - sae: i32, -) -> __mmask8 { - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpss(a, b, $imm5, m as i8, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, ror, zero)) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr) => { - vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 { - macro_rules! call { - ($imm5:expr) => { - vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION) - }; - } - let r = constify_imm5!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, rol, src.as_i64x8())) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2, 3)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpsd(a, b, $imm5, neg_one, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, rol, zero)) } -/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3, 4)] -#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] -pub unsafe fn _mm_mask_cmp_round_sd_mask( - m: __mmask8, - a: __m128d, - b: __m128d, - op: i32, - sae: i32, -) -> __mmask8 { - macro_rules! call { - ($imm5:expr, $imm4:expr) => { - vcmpsd(a, b, $imm5, m as i8, $imm4) - }; - } - let r = constify_imm5_sae!(op, sae, call); - transmute(r) +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector. +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, ror, src.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmplt_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, ror, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector. +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpgt_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_mask_sllv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmple_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_mask_srlv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpge_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpeq_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_mask_sllv_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector. +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16())) +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpneq_epu32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op. +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 { - let neg_one = -1; - macro_rules! call { - ($imm3:expr) => { - vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_mask_srlv_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) } -/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_epu32_mask( - m: __mmask16, - a: __m512i, - b: __m512i, - op: _MM_CMPINT_ENUM, -) -> __mmask16 { - macro_rules! call { - ($imm3:expr) => { - vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) } -/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector. +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmplt_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, and, src.as_i32x16())) } -/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector. +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, and, zero)) } -/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpgt_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, and, src.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmple_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, and, zero)) } -/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpge_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector. +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vpord))] +pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, or, src.as_i32x16())) } -/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpeq_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpord))] +pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, or, zero)) } -/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector. +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { - simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16())) +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { - _mm512_cmpneq_epi32_mask(a, b) & m +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, or, src.as_i64x8())) } -/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op. +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 { - let neg_one = -1; - macro_rules! call { - ($imm3:expr) => { - vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, or, zero)) } -/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_epi32_mask( - m: __mmask16, - a: __m512i, - b: __m512i, - op: _MM_CMPINT_ENUM, -) -> __mmask16 { - macro_rules! call { - ($imm3:expr) => { - vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmplt_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpxord))] +pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, xor, src.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector. +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpxord))] +pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, xor, zero)) } -/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpgt_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) } -/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, xor, src.as_i64x8())) } -/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmple_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, xor, zero)) } -/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) } -/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpge_epu64_mask(b, a) & m +#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw +pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(kandw(a, b)) } -/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector. +/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw +pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(kandw(a, b)) } -/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpeq_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw +pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(korw(a, b)) } -/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector. +/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) +#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw +pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(korw(a, b)) } -/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpneq_epu64_mask(a, b) & m +#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw +pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(kxorw(a, b)) } -/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op. +/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm3:expr) => { - vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw +pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(kxorw(a, b)) } -/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Sets packed 32-bit integers in `dst` with the supplied values. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask) +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_epu64_mask( - m: __mmask8, - a: __m512i, - b: __m512i, - op: _MM_CMPINT_ENUM, -) -> __mmask8 { - macro_rules! call { - ($imm3:expr) => { - vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +pub unsafe fn _mm512_set_ps( + e0: f32, + e1: f32, + e2: f32, + e3: f32, + e4: f32, + e5: f32, + e6: f32, + e7: f32, + e8: f32, + e9: f32, + e10: f32, + e11: f32, + e12: f32, + e13: f32, + e14: f32, + e15: f32, +) -> __m512 { + _mm512_setr_ps( + e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, + ) } -/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// Sets packed 32-bit integers in `dst` with the supplied values in +/// reverse order. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64) +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) +pub unsafe fn _mm512_setr_ps( + e0: f32, + e1: f32, + e2: f32, + e3: f32, + e4: f32, + e5: f32, + e6: f32, + e7: f32, + e8: f32, + e9: f32, + e10: f32, + e11: f32, + e12: f32, + e13: f32, + e14: f32, + e15: f32, +) -> __m512 { + let r = f32x16::new( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ); + transmute(r) } -/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64) +/// Broadcast 64-bit float `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmplt_epi64_mask(a, b) & m +pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d { + transmute(f64x8::splat(a)) } -/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64) +/// Broadcast 32-bit float `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) +pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 { + transmute(f32x16::splat(a)) } -/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64) +/// Sets packed 32-bit integers in `dst` with the supplied values. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpgt_epi64_mask(a, b) & m +pub unsafe fn _mm512_set_epi32( + e15: i32, + e14: i32, + e13: i32, + e12: i32, + e11: i32, + e10: i32, + e9: i32, + e8: i32, + e7: i32, + e6: i32, + e5: i32, + e4: i32, + e3: i32, + e2: i32, + e1: i32, + e0: i32, +) -> __m512i { + _mm512_setr_epi32( + e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, + ) } -/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64) +/// Broadcast 32-bit integer `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) +pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i { + transmute(i32x16::splat(a)) } -/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64) +/// Broadcast 64-bit integer `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmple_epi64_mask(a, b) & m +pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { + transmute(i64x8::splat(a)) } -/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_LT_OS) } -/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpge_epi64_mask(b, a) & m +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS) } -/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_NLT_US) } -/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpeq_epi64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpnlt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US) } -/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { - simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_LE_OS) } -/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k +/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vpcmp))] -pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { - _mm512_cmpneq_epi64_mask(a, b) & m +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS) } -/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_ps) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(2)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 { - let neg_one = -1; - macro_rules! call { - ($imm3:expr) => { - vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_NLE_US) } -/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op, -/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_ps) #[inline] #[target_feature(enable = "avx512f")] -#[rustc_args_required_const(3)] -#[cfg_attr(test, assert_instr(vpcmp, op = 0))] -pub unsafe fn _mm512_mask_cmp_epi64_mask( - m: __mmask8, - a: __m512i, - b: __m512i, - op: _MM_CMPINT_ENUM, -) -> __mmask8 { - macro_rules! call { - ($imm3:expr) => { - vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8) - }; - } - let r = constify_imm3!(op, call); - transmute(r) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpnle_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US) } -/// Returns vector of type `__m512d` with undefined elements. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps) #[inline] #[target_feature(enable = "avx512f")] -// This intrinsic has no corresponding instruction. -pub unsafe fn _mm512_undefined_pd() -> __m512d { - _mm512_set1_pd(0.0) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ) } -/// Returns vector of type `__m512` with undefined elements. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps) #[inline] #[target_feature(enable = "avx512f")] -// This intrinsic has no corresponding instruction. -pub unsafe fn _mm512_undefined_ps() -> __m512 { - _mm512_set1_ps(0.0) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ) } -/// Loads 512-bits (composed of 8 packed double-precision (64-bit) -/// floating-point elements) from memory into result. -/// `mem_addr` does not need to be aligned on any particular boundary. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmovups))] -pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d { - ptr::read_unaligned(mem_addr as *const __m512d) +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ) } -/// Stores 512-bits (composed of 8 packed double-precision (64-bit) -/// floating-point elements) from `a` into memory. -/// `mem_addr` does not need to be aligned on any particular boundary. +/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmovups))] -pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) { - ptr::write_unaligned(mem_addr as *mut __m512d, a); +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ) } -/// Loads 512-bits (composed of 16 packed single-precision (32-bit) -/// floating-point elements) from memory into result. -/// `mem_addr` does not need to be aligned on any particular boundary. +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmovups))] -pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 { - ptr::read_unaligned(mem_addr as *const __m512) +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: i32) -> __mmask16 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr) => { + vcmpps( + a.as_f32x16(), + b.as_f32x16(), + $imm5, + neg_one, + _MM_FROUND_CUR_DIRECTION, + ) + }; + } + let r = constify_imm5!(op, call); + transmute(r) } -/// Stores 512-bits (composed of 16 packed single-precision (32-bit) -/// floating-point elements) from `a` into memory. -/// `mem_addr` does not need to be aligned on any particular boundary. +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask) #[inline] #[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(vmovups))] -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { - ptr::write_unaligned(mem_addr as *mut __m512, a); +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_ps_mask(m: __mmask16, a: __m512, b: __m512, op: i32) -> __mmask16 { + macro_rules! call { + ($imm5:expr) => { + vcmpps( + a.as_f32x16(), + b.as_f32x16(), + $imm5, + m as i16, + _MM_FROUND_CUR_DIRECTION, + ) + }; + } + let r = constify_imm5!(op, call); + transmute(r) } -/// Sets packed 64-bit integers in `dst` with the supplied values in -/// reverse order. +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op. /// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_setr_pd( - e0: f64, - e1: f64, - e2: f64, - e3: f64, - e4: f64, - e5: f64, - e6: f64, - e7: f64, -) -> __m512d { - let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); +#[rustc_args_required_const(2, 3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, op: i32, sae: i32) -> __mmask16 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, neg_one, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); transmute(r) } -/// Sets packed 64-bit integers in `dst` with the supplied values. +/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). /// -/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_ps_mask) #[inline] #[target_feature(enable = "avx512f")] -pub unsafe fn _mm512_set_pd( - e0: f64, - e1: f64, - e2: f64, - e3: f64, - e4: f64, - e5: f64, - e6: f64, - e7: f64, -) -> __m512d { - _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) +#[rustc_args_required_const(3, 4)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm512_mask_cmp_round_ps_mask( + m: __mmask16, + a: __m512, + b: __m512, + op: i32, + sae: i32, +) -> __mmask16 { + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpps(a.as_f32x16(), b.as_f32x16(), $imm5, m as i16, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) } -/// Equal -pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00; -/// Less-than -pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01; -/// Less-than-or-equal -pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02; -/// False -pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03; -/// Not-equal -pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04; -/// Not less-than -pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05; -/// Not less-than-or-equal -pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06; -/// True -pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07; +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q) +} -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.x86.avx512.gather.dpd.512"] - fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8; - #[link_name = "llvm.x86.avx512.gather.dps.512"] - fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16; - #[link_name = "llvm.x86.avx512.gather.qpd.512"] - fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8; - #[link_name = "llvm.x86.avx512.gather.qps.512"] - fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8; - #[link_name = "llvm.x86.avx512.gather.dpq.512"] - fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8; - #[link_name = "llvm.x86.avx512.gather.dpi.512"] - fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.gather.qpq.512"] - fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8; - #[link_name = "llvm.x86.avx512.gather.qpi.512"] - fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8; +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmpord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q) +} - #[link_name = "llvm.x86.avx512.scatter.dpd.512"] - fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.dps.512"] - fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.qpd.512"] - fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.qps.512"] - fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.dpq.512"] - fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.dpi.512"] - fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.qpq.512"] - fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32); - #[link_name = "llvm.x86.avx512.scatter.qpi.512"] - fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32); +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 { + _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q) +} - #[link_name = "llvm.x86.avx512.mask.cmp.ss"] - fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8; - #[link_name = "llvm.x86.avx512.mask.cmp.sd"] - fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8; - #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"] - fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16; - #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"] - fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8; - #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"] - fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8; - #[link_name = "llvm.x86.avx512.mask.cmp.q.512"] - fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8; - #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"] - fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; - #[link_name = "llvm.x86.avx512.mask.cmp.d.512"] - fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; +/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_ps_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmpunord_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 { + _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q) +} - #[link_name = "llvm.x86.avx512.mask.prol.d.512"] - fn vprold(a: i32x16, i8: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.pror.d.512"] - fn vprord(a: i32x16, i8: i32) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.prol.q.512"] - fn vprolq(a: i64x8, i8: i32) -> i64x8; - #[link_name = "llvm.x86.avx512.mask.pror.q.512"] - fn vprorq(a: i64x8, i8: i32) -> i64x8; +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_LT_OS) +} - #[link_name = "llvm.x86.avx512.mask.prolv.d.512"] - fn vprolvd(a: i32x16, b: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.prorv.d.512"] - fn vprorvd(a: i32x16, b: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.mask.prolv.q.512"] - fn vprolvq(a: i64x8, b: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.mask.prorv.q.512"] - fn vprorvq(a: i64x8, b: i64x8) -> i64x8; +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmplt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS) +} - #[link_name = "llvm.x86.avx512.psllv.d.512"] - fn vpsllvd(a: i32x16, b: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.psrlv.d.512"] - fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.psllv.q.512"] - fn vpsllvq(a: i64x8, b: i64x8) -> i64x8; - #[link_name = "llvm.x86.avx512.psrlv.q.512"] - fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8; +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnlt_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_NLT_US) +} - #[link_name = "llvm.x86.avx512.pslli.d.512"] - fn vpsllid(a: i32x16, imm8: u32) -> i32x16; - #[link_name = "llvm.x86.avx512.psrli.d.512"] - fn vpsrlid(a: i32x16, imm8: u32) -> i32x16; - #[link_name = "llvm.x86.avx512.pslli.q.512"] - fn vpslliq(a: i64x8, imm8: u32) -> i64x8; - #[link_name = "llvm.x86.avx512.psrli.q.512"] - fn vpsrliq(a: i64x8, imm8: u32) -> i64x8; +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnlt_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US) +} - #[link_name = "llvm.x86.avx512.psll.d.512"] - fn vpslld(a: i32x16, count: i32x4) -> i32x16; - #[link_name = "llvm.x86.avx512.psrl.d.512"] - fn vpsrld(a: i32x16, count: i32x4) -> i32x16; - #[link_name = "llvm.x86.avx512.psll.q.512"] - fn vpsllq(a: i64x8, count: i64x2) -> i64x8; - #[link_name = "llvm.x86.avx512.psrl.q.512"] - fn vpsrlq(a: i64x8, count: i64x2) -> i64x8; +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_LE_OS) +} - #[link_name = "llvm.x86.avx512.psra.d.512"] - fn vpsrad(a: i32x16, count: i32x4) -> i32x16; - #[link_name = "llvm.x86.avx512.psra.q.512"] - fn vpsraq(a: i64x8, count: i64x2) -> i64x8; +/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmple_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS) +} - #[link_name = "llvm.x86.avx512.psrai.d.512"] - fn vpsraid(a: i32x16, imm8: u32) -> i32x16; - #[link_name = "llvm.x86.avx512.psrai.q.512"] - fn vpsraiq(a: i64x8, imm8: u32) -> i64x8; +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpnle_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_NLE_US) +} - #[link_name = "llvm.x86.avx512.psrav.d.512"] - fn vpsravd(a: i32x16, count: i32x16) -> i32x16; - #[link_name = "llvm.x86.avx512.psrav.q.512"] - fn vpsravq(a: i64x8, count: i64x8) -> i64x8; +/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpnle_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpnle_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US) +} - #[link_name = "llvm.x86.avx512.kand.w"] - fn kandw(ma: u16, mb: u16) -> u16; - #[link_name = "llvm.x86.avx512.kor.w"] - fn korw(ma: u16, mb: u16) -> u16; - #[link_name = "llvm.x86.avx512.kxor.w"] - fn kxorw(ma: u16, mb: u16) -> u16; +/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ) } -#[cfg(test)] -mod tests { - use std; - use stdarch_test::simd_test; +/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpeq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ) +} - use crate::core_arch::x86::*; - use crate::hint::black_box; +/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp))] +pub unsafe fn _mm512_mask_cmpneq_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, op: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr) => { + vcmppd( + a.as_f64x8(), + b.as_f64x8(), + $imm5, + neg_one, + _MM_FROUND_CUR_DIRECTION, + ) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_pd_mask(m: __mmask8, a: __m512d, b: __m512d, op: i32) -> __mmask8 { + macro_rules! call { + ($imm5:expr) => { + vcmppd( + a.as_f64x8(), + b.as_f64x8(), + $imm5, + m as i8, + _MM_FROUND_CUR_DIRECTION, + ) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2, 3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, op: i32, sae: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, neg_one, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_round_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3, 4)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm512_mask_cmp_round_pd_mask( + m: __mmask8, + a: __m512d, + b: __m512d, + op: i32, + sae: i32, +) -> __mmask8 { + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmppd(a.as_f64x8(), b.as_f64x8(), $imm5, m as i8, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpord_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmpord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 { + _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q) +} + +/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpunord_pd_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcmp, op = 0))] +pub unsafe fn _mm512_mask_cmpunord_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 { + _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, op: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr) => { + vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_ss_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_mask_cmp_ss_mask(m: __mmask8, a: __m128, b: __m128, op: i32) -> __mmask8 { + macro_rules! call { + ($imm5:expr) => { + vcmpss(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_ss_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2, 3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, op: i32, sae: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpss(a, b, $imm5, neg_one, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_ss_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3, 4)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_mask_cmp_round_ss_mask( + m: __mmask8, + a: __m128, + b: __m128, + op: i32, + sae: i32, +) -> __mmask8 { + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpss(a, b, $imm5, m as i8, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, op: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr) => { + vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_sd_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_mask_cmp_sd_mask(m: __mmask8, a: __m128d, b: __m128d, op: i32) -> __mmask8 { + macro_rules! call { + ($imm5:expr) => { + vcmpsd(a, b, $imm5, m as i8, _MM_FROUND_CUR_DIRECTION) + }; + } + let r = constify_imm5!(op, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_round_sd_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2, 3)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, op: i32, sae: i32) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpsd(a, b, $imm5, neg_one, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in a mask vector using zeromask m (the element is zeroed out when mask bit 0 is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cmp_round_sd_mask&expand=5236,755,757) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3, 4)] +#[cfg_attr(test, assert_instr(vcmp, op = 0, sae = 4))] +pub unsafe fn _mm_mask_cmp_round_sd_mask( + m: __mmask8, + a: __m128d, + b: __m128d, + op: i32, + sae: i32, +) -> __mmask8 { + macro_rules! call { + ($imm5:expr, $imm4:expr) => { + vcmpsd(a, b, $imm5, m as i8, $imm4) + }; + } + let r = constify_imm5_sae!(op, sae, call); + transmute(r) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_lt(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmplt_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_gt(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpgt_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_le(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmple_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_ge(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpge_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_eq(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpeq_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_ne(a.as_u32x16(), b.as_u32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpneq_epu32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpneq_epu32_mask(a, b) & m +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 { + let neg_one = -1; + macro_rules! call { + ($imm3:expr) => { + vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_epu32_mask( + m: __mmask16, + a: __m512i, + b: __m512i, + op: _MM_CMPINT_ENUM, +) -> __mmask16 { + macro_rules! call { + ($imm3:expr) => { + vpcmpud(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_lt(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmplt_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_gt(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpgt_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_le(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmple_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_ge(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpge_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_eq(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpeq_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 { + simd_bitmask::(simd_ne(a.as_i32x16(), b.as_i32x16())) +} + +/// Compare packed signed 32-bit integers in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi32) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpneq_epi32_mask(m: __mmask16, a: __m512i, b: __m512i) -> __mmask16 { + _mm512_cmpneq_epi32_mask(a, b) & m +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask16 { + let neg_one = -1; + macro_rules! call { + ($imm3:expr) => { + vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, neg_one) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi32_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_epi32_mask( + m: __mmask16, + a: __m512i, + b: __m512i, + op: _MM_CMPINT_ENUM, +) -> __mmask16 { + macro_rules! call { + ($imm3:expr) => { + vpcmpd(a.as_i32x16(), b.as_i32x16(), $imm3, m as i16) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmple_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpge_epu64_mask(b, a) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpeq_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epu64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) +} + +/// Compare packed unsigned 64-bit integers in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epu64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpneq_epu64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpneq_epu64_mask(a, b) & m +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm3:expr) => { + vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epu64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_epu64_mask( + m: __mmask8, + a: __m512i, + b: __m512i, + op: _MM_CMPINT_ENUM, +) -> __mmask8 { + macro_rules! call { + ($imm3:expr) => { + vpcmpuq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmplt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmplt_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpgt_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpgt_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmple_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmple_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpge_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpge_epi64_mask(b, a) & m +} + +/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for equality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpeq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpeq_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 { + simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) +} + +/// Compare packed signed 64-bit integers in a and b for inequality, and store the results in a mask vector k +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_epi64) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpcmp))] +pub unsafe fn _mm512_mask_cmpneq_epi64_mask(m: __mmask8, a: __m512i, b: __m512i) -> __mmask8 { + _mm512_cmpneq_epi64_mask(a, b) & m +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(2)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, op: _MM_CMPINT_ENUM) -> __mmask8 { + let neg_one = -1; + macro_rules! call { + ($imm3:expr) => { + vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, neg_one) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by op, +/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_epi64_mask) +#[inline] +#[target_feature(enable = "avx512f")] +#[rustc_args_required_const(3)] +#[cfg_attr(test, assert_instr(vpcmp, op = 0))] +pub unsafe fn _mm512_mask_cmp_epi64_mask( + m: __mmask8, + a: __m512i, + b: __m512i, + op: _MM_CMPINT_ENUM, +) -> __mmask8 { + macro_rules! call { + ($imm3:expr) => { + vpcmpq(a.as_i64x8(), b.as_i64x8(), $imm3, m as i8) + }; + } + let r = constify_imm3!(op, call); + transmute(r) +} + +/// Returns vector of type `__m512d` with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd) +#[inline] +#[target_feature(enable = "avx512f")] +// This intrinsic has no corresponding instruction. +pub unsafe fn _mm512_undefined_pd() -> __m512d { + _mm512_set1_pd(0.0) +} + +/// Returns vector of type `__m512` with undefined elements. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps) +#[inline] +#[target_feature(enable = "avx512f")] +// This intrinsic has no corresponding instruction. +pub unsafe fn _mm512_undefined_ps() -> __m512 { + _mm512_set1_ps(0.0) +} + +/// Loads 512-bits (composed of 8 packed double-precision (64-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d { + ptr::read_unaligned(mem_addr as *const __m512d) +} + +/// Stores 512-bits (composed of 8 packed double-precision (64-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) { + ptr::write_unaligned(mem_addr as *mut __m512d, a); +} + +/// Loads 512-bits (composed of 16 packed single-precision (32-bit) +/// floating-point elements) from memory into result. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 { + ptr::read_unaligned(mem_addr as *const __m512) +} + +/// Stores 512-bits (composed of 16 packed single-precision (32-bit) +/// floating-point elements) from `a` into memory. +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vmovups))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { + ptr::write_unaligned(mem_addr as *mut __m512, a); +} + +/// Sets packed 64-bit integers in `dst` with the supplied values in +/// reverse order. +/// +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +#[inline] +#[target_feature(enable = "avx512f")] +pub unsafe fn _mm512_setr_pd( + e0: f64, + e1: f64, + e2: f64, + e3: f64, + e4: f64, + e5: f64, + e6: f64, + e7: f64, +) -> __m512d { + let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7); + transmute(r) +} + +/// Sets packed 64-bit integers in `dst` with the supplied values. +/// +/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd) +#[inline] +#[target_feature(enable = "avx512f")] +pub unsafe fn _mm512_set_pd( + e0: f64, + e1: f64, + e2: f64, + e3: f64, + e4: f64, + e5: f64, + e6: f64, + e7: f64, +) -> __m512d { + _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) +} + +/// Equal +pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00; +/// Less-than +pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01; +/// Less-than-or-equal +pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02; +/// False +pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03; +/// Not-equal +pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04; +/// Not less-than +pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05; +/// Not less-than-or-equal +pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06; +/// True +pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.avx512.pmul.dq.512"] + fn vpmuldq(a: i32x16, b: i32x16) -> i64x8; + #[link_name = "llvm.x86.avx512.pmulu.dq.512"] + fn vpmuludq(a: u32x16, b: u32x16) -> u64x8; + + #[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"] + fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"] + fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.pmins.d.512"] + fn vpminsd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.pmins.q.512"] + fn vpminsq(a: i64x8, b: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"] + fn vpmaxud(a: u32x16, b: u32x16) -> u32x16; + #[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"] + fn vpmaxuq(a: u64x8, b: u64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.pminu.d.512"] + fn vpminud(a: u32x16, b: u32x16) -> u32x16; + #[link_name = "llvm.x86.avx512.mask.pminu.q.512"] + fn vpminuq(a: u64x8, b: u64x8) -> i64x8; + + #[link_name = "llvm.sqrt.v16f32"] + fn vsqrtps(a: f32x16) -> f32x16; + #[link_name = "llvm.sqrt.v8f64"] + fn vsqrtpd(a: f64x8) -> f64x8; + + #[link_name = "llvm.x86.avx512.gather.dpd.512"] + fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.gather.dps.512"] + fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16; + #[link_name = "llvm.x86.avx512.gather.qpd.512"] + fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8; + #[link_name = "llvm.x86.avx512.gather.qps.512"] + fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8; + #[link_name = "llvm.x86.avx512.gather.dpq.512"] + fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.gather.dpi.512"] + fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.gather.qpq.512"] + fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.gather.qpi.512"] + fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8; + + #[link_name = "llvm.x86.avx512.scatter.dpd.512"] + fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.dps.512"] + fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpd.512"] + fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qps.512"] + fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.dpq.512"] + fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.dpi.512"] + fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpq.512"] + fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32); + #[link_name = "llvm.x86.avx512.scatter.qpi.512"] + fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32); + + #[link_name = "llvm.x86.avx512.mask.cmp.ss"] + fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.sd"] + fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"] + fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16; + #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"] + fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8; + #[link_name = "llvm.x86.avx512.mask.ucmp.q.512"] + fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8; + #[link_name = "llvm.x86.avx512.mask.cmp.q.512"] + fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8; + #[link_name = "llvm.x86.avx512.mask.ucmp.d.512"] + fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; + #[link_name = "llvm.x86.avx512.mask.cmp.d.512"] + fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; + + #[link_name = "llvm.x86.avx512.mask.prol.d.512"] + fn vprold(a: i32x16, i8: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.pror.d.512"] + fn vprord(a: i32x16, i8: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prol.q.512"] + fn vprolq(a: i64x8, i8: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.pror.q.512"] + fn vprorq(a: i64x8, i8: i32) -> i64x8; + + #[link_name = "llvm.x86.avx512.mask.prolv.d.512"] + fn vprolvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prorv.d.512"] + fn vprorvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prolv.q.512"] + fn vprolvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.prorv.q.512"] + fn vprorvq(a: i64x8, b: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.psllv.d.512"] + fn vpsllvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psrlv.d.512"] + fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psllv.q.512"] + fn vpsllvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.psrlv.q.512"] + fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.pslli.d.512"] + fn vpsllid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.psrli.d.512"] + fn vpsrlid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.pslli.q.512"] + fn vpslliq(a: i64x8, imm8: u32) -> i64x8; + #[link_name = "llvm.x86.avx512.psrli.q.512"] + fn vpsrliq(a: i64x8, imm8: u32) -> i64x8; + + #[link_name = "llvm.x86.avx512.psll.d.512"] + fn vpslld(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psrl.d.512"] + fn vpsrld(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psll.q.512"] + fn vpsllq(a: i64x8, count: i64x2) -> i64x8; + #[link_name = "llvm.x86.avx512.psrl.q.512"] + fn vpsrlq(a: i64x8, count: i64x2) -> i64x8; + + #[link_name = "llvm.x86.avx512.psra.d.512"] + fn vpsrad(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psra.q.512"] + fn vpsraq(a: i64x8, count: i64x2) -> i64x8; + + #[link_name = "llvm.x86.avx512.psrai.d.512"] + fn vpsraid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.psrai.q.512"] + fn vpsraiq(a: i64x8, imm8: u32) -> i64x8; + + #[link_name = "llvm.x86.avx512.psrav.d.512"] + fn vpsravd(a: i32x16, count: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psrav.q.512"] + fn vpsravq(a: i64x8, count: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.kand.w"] + fn kandw(ma: u16, mb: u16) -> u16; + #[link_name = "llvm.x86.avx512.kor.w"] + fn korw(ma: u16, mb: u16) -> u16; + #[link_name = "llvm.x86.avx512.kxor.w"] + fn kxorw(ma: u16, mb: u16) -> u16; +} + +#[cfg(test)] +mod tests { + use std; + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::hint::black_box; + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_abs_epi32(a); + let e = _mm512_setr_epi32( + 0, + 1, + 1, + i32::MAX, + i32::MAX.wrapping_add(1), + 100, + 100, + 32, + 0, + 1, + 1, + i32::MAX, + i32::MAX.wrapping_add(1), + 100, + 100, + 32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_mask_abs_epi32(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a); + let e = _mm512_setr_epi32( + 0, + 1, + 1, + i32::MAX, + i32::MAX.wrapping_add(1), + 100, + 100, + 32, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_abs_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let r = _mm512_maskz_abs_epi32(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a); + let e = _mm512_setr_epi32( + 0, + 1, + 1, + i32::MAX, + i32::MAX.wrapping_add(1), + 100, + 100, + 32, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_ps() { + #[rustfmt::skip] + let a = _mm512_setr_ps( + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + 0., 1., -1., f32::MAX, + f32::MIN, 100., -100., -32., + ); + let r = _mm512_abs_ps(a); + let e = _mm512_setr_ps( + 0., + 1., + 1., + f32::MAX, + f32::MAX, + 100., + 100., + 32., + 0., + 1., + 1., + f32::MAX, + f32::MAX, + 100., + 100., + 32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let r = _mm512_mask_abs_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a); + let e = _mm512_setr_ps( + 0., + 1., + 1., + f32::MAX, + f32::MAX, + 100., + 100., + 32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_epi32() { + let a = _mm512_setr_epi32( + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_add_epi32(a, b); + let e = _mm512_setr_epi32( + 1, + 2, + 0, + i32::MIN, + i32::MIN + 1, + 101, + -99, + -31, + 1, + 2, + 0, + i32::MIN, + i32::MIN + 1, + 101, + -99, + -31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_mask_add_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32( + 1, + 2, + 0, + i32::MIN, + i32::MIN + 1, + 101, + -99, + -31, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_epi32() { + #[rustfmt::skip] + let a = _mm512_setr_epi32( + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + 0, 1, -1, i32::MAX, + i32::MIN, 100, -100, -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_maskz_add_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32( + 1, + 2, + 0, + i32::MIN, + i32::MIN + 1, + 101, + -99, + -31, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_add_ps(a, b); + let e = _mm512_setr_ps( + 1., + 2., + 0., + f32::MAX, + f32::MIN + 1., + 101., + -99., + -31., + 1., + 2., + 0., + f32::MAX, + f32::MIN + 1., + 101., + -99., + -31., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_mask_add_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 1., + 2., + 0., + f32::MAX, + f32::MIN + 1., + 101., + -99., + -31., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_maskz_add_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 1., + 2., + 0., + f32::MAX, + f32::MIN + 1., + 101., + -99., + -31., + 0., + 0., + 0., + 0., + 0., + 0., + 0., + 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_epi32() { + let a = _mm512_setr_epi32( + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_sub_epi32(a, b); + let e = _mm512_setr_epi32( + -1, + 0, + -2, + i32::MAX - 1, + i32::MAX, + 99, + -101, + -33, + -1, + 0, + -2, + i32::MAX - 1, + i32::MAX, + 99, + -101, + -33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_epi32() { + let a = _mm512_setr_epi32( + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_mask_sub_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32( + -1, + 0, + -2, + i32::MAX - 1, + i32::MAX, + 99, + -101, + -33, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_epi32() { + let a = _mm512_setr_epi32( + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + let b = _mm512_set1_epi32(1); + let r = _mm512_maskz_sub_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32( + -1, + 0, + -2, + i32::MAX - 1, + i32::MAX, + 99, + -101, + -33, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_sub_ps(a, b); + let e = _mm512_setr_ps( + -1., + 0., + -2., + f32::MAX - 1., + f32::MIN, + 99., + -101., + -33., + -1., + 0., + -2., + f32::MAX - 1., + f32::MIN, + 99., + -101., + -33., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_mask_sub_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + -1., + 0., + -2., + f32::MAX - 1., + f32::MIN, + 99., + -101., + -33., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let b = _mm512_set1_ps(1.); + let r = _mm512_maskz_sub_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + -1., + 0., + -2., + f32::MAX - 1., + f32::MIN, + 99., + -101., + -33., + 0., + 0., + 0., + 0., + 0., + 0., + 0., + 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mullo_epi32() { + let a = _mm512_setr_epi32( + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + 0, + 1, + -1, + i32::MAX, + i32::MIN, + 100, + -100, + -32, + ); + let b = _mm512_set1_epi32(2); + let r = _mm512_mullo_epi32(a, b); + let e = _mm512_setr_epi32( + 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64, + ); + assert_eq_m512i(r, e); + } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_abs_epi32() { - #[rustfmt::skip] + unsafe fn test_mm512_mask_mullo_epi32() { let a = _mm512_setr_epi32( - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - ); - let r = _mm512_abs_epi32(a); - let e = _mm512_setr_epi32( 0, 1, - 1, + -1, i32::MAX, - i32::MAX.wrapping_add(1), - 100, + i32::MIN, 100, - 32, + -100, + -32, 0, 1, - 1, + -1, i32::MAX, - i32::MAX.wrapping_add(1), + i32::MIN, 100, + -100, + -32, + ); + let b = _mm512_set1_epi32(2); + let r = _mm512_mask_mullo_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32( + 0, + 2, + -2, + -2, + 0, + 200, + -200, + -64, + 0, + 1, + -1, + i32::MAX, + i32::MIN, 100, - 32, + -100, + -32, ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_abs_epi32() { - #[rustfmt::skip] + unsafe fn test_mm512_maskz_mullo_epi32() { let a = _mm512_setr_epi32( - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - ); - let r = _mm512_mask_abs_epi32(a, 0, a); - assert_eq_m512i(r, a); - let r = _mm512_mask_abs_epi32(a, 0b11111111, a); - let e = _mm512_setr_epi32( 0, 1, - 1, + -1, i32::MAX, - i32::MAX.wrapping_add(1), - 100, + i32::MIN, 100, - 32, + -100, + -32, 0, 1, -1, @@ -3926,42 +5706,503 @@ mod tests { -100, -32, ); + let b = _mm512_set1_epi32(2); + let r = _mm512_maskz_mullo_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_maskz_abs_epi32() { - #[rustfmt::skip] - let a = _mm512_setr_epi32( - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, - 0, 1, -1, i32::MAX, - i32::MIN, 100, -100, -32, + unsafe fn test_mm512_mul_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., ); - let r = _mm512_maskz_abs_epi32(0, a); + let b = _mm512_set1_ps(2.); + let r = _mm512_mul_ps(a, b); + let e = _mm512_setr_ps( + 0., + 2., + -2., + f32::INFINITY, + f32::NEG_INFINITY, + 200., + -200., + -64., + 0., + 2., + -2., + f32::INFINITY, + f32::NEG_INFINITY, + 200., + -200., + -64., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let b = _mm512_set1_ps(2.); + let r = _mm512_mask_mul_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., + 2., + -2., + f32::INFINITY, + f32::NEG_INFINITY, + 200., + -200., + -64., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_ps() { + let a = _mm512_setr_ps( + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + 0., + 1., + -1., + f32::MAX, + f32::MIN, + 100., + -100., + -32., + ); + let b = _mm512_set1_ps(2.); + let r = _mm512_maskz_mul_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., + 2., + -2., + f32::INFINITY, + f32::NEG_INFINITY, + 200., + -200., + -64., + 0., + 0., + 0., + 0., + 0., + 0., + 0., + 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_div_ps() { + let a = _mm512_setr_ps( + 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32., + ); + let b = _mm512_setr_ps( + 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2., + ); + let r = _mm512_div_ps(a, b); + let e = _mm512_setr_ps( + 0., + 0.5, + -0.5, + -1., + 50., + f32::INFINITY, + -50., + -16., + 0., + 0.5, + -0.5, + 500., + f32::NEG_INFINITY, + 50., + -50., + -16., + ); + assert_eq_m512(r, e); // 0/0 = NAN + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_div_ps() { + let a = _mm512_setr_ps( + 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32., + ); + let b = _mm512_setr_ps( + 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2., + ); + let r = _mm512_mask_div_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., + 0.5, + -0.5, + -1., + 50., + f32::INFINITY, + -50., + -16., + 0., + 1., + -1., + 1000., + -131., + 100., + -100., + -32., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_div_ps() { + let a = _mm512_setr_ps( + 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32., + ); + let b = _mm512_setr_ps( + 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2., + ); + let r = _mm512_maskz_div_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., + 0.5, + -0.5, + -1., + 50., + f32::INFINITY, + -50., + -16., + 0., + 0., + 0., + 0., + 0., + 0., + 0., + 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epi32(a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epi32(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_abs_epi32(0b11111111, a); - let e = _mm512_setr_epi32( - 0, - 1, - 1, - i32::MAX, - i32::MAX.wrapping_add(1), - 100, - 100, - 32, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, + let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_max_ps(a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_mask_max_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_maskz_max_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epu32(a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epu32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epu32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epi32(a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epi32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_min_ps(a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_mask_min_ps(a, 0, a, b); + assert_eq_m512(r, a); + let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_ps() { + let a = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., ); + let b = _mm512_setr_ps( + 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0., + ); + let r = _mm512_maskz_min_ps(0, a, b); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epu32(a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epu32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epu32() { + let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epu32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b); + let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sqrt_ps() { + let a = _mm512_setr_ps( + 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225., + ); + let r = _mm512_sqrt_ps(a); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sqrt_ps() { + let a = _mm512_setr_ps( + 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225., + ); + let r = _mm512_mask_sqrt_ps(a, 0, a); + assert_eq_m512(r, a); + let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sqrt_ps() { + let a = _mm512_setr_ps( + 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225., + ); + let r = _mm512_maskz_sqrt_ps(0, a); + assert_eq_m512(r, _mm512_setzero_ps()); + let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a); + let e = _mm512_setr_ps( + 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_i32gather_ps() { let mut arr = [0f32; 256]; diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index aa6857ecce..faa444d608 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -59,32 +59,583 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_abs_epi64() { - #[rustfmt::skip] - let a = _mm512_setr_epi64( - 0, 1, -1, i64::MAX, - i64::MIN, 100, -100, -32 - ); + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); let r = _mm512_mask_abs_epi64(a, 0, a); assert_eq_m512i(r, a); - let r = _mm512_mask_abs_epi64(a, 0b11111111, a); - let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32); + let r = _mm512_mask_abs_epi64(a, 0b00001111, a); + let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, -100, -32); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_abs_epi64() { + #[rustfmt::skip] + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let r = _mm512_maskz_abs_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi64(0b00001111, a); + let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let r = _mm512_abs_pd(a); + let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MAX, 100., 100., 32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let r = _mm512_mask_abs_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_abs_pd(a, 0b00001111, a); + let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_add_epi64(a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN + 1, 101, -99, -31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_mask_add_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_add_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_epi64() { #[rustfmt::skip] let a = _mm512_setr_epi64( 0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32 ); - let r = _mm512_maskz_abs_epi64(0, a); + let b = _mm512_set1_epi64(1); + let r = _mm512_maskz_add_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_add_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_add_pd(a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, f64::MIN + 1., 101., -99., -31.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_mask_add_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_add_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_maskz_add_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_add_pd(0b00001111, a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_sub_epi64(a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, i64::MAX, 99, -101, -33); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_mask_sub_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_sub_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, i64::MIN, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_maskz_sub_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sub_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_sub_pd(a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., f64::MIN, 99., -101., -33.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_mask_sub_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_sub_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_maskz_sub_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sub_pd(0b00001111, a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_epi32() { + let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mul_epi32(a, b); + let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_epi32() { + let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_mul_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mul_epi32(a, 0b00001111, a, b); + let e = _mm512_set_epi64( + 1 | 1 << 32, + 1 | 1 << 32, + 1 | 1 << 32, + 1 | 1 << 32, + 7, + 5, + 3, + 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_epi32() { + let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_mul_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mul_epi32(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_epu32() { + let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mul_epu32(a, b); + let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_epu32() { + let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_mul_epu32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mul_epu32(a, 0b00001111, a, b); + let e = _mm512_set_epi64( + 1 | 1 << 32, + 1 | 1 << 32, + 1 | 1 << 32, + 1 | 1 << 32, + 7, + 5, + 3, + 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_epu32() { + let a = _mm512_setr_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_mul_epu32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mul_epu32(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mullox_epi64() { + let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32); + let b = _mm512_set1_epi64(2); + let r = _mm512_mullox_epi64(a, b); + let e = _mm512_setr_epi64(0, 2, -2, 0, -2, 200, -200, -64); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mullox_epi64() { + let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32); + let b = _mm512_set1_epi64(2); + let r = _mm512_mask_mullox_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mullox_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 2, -2, 0, i64::MAX, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_mul_pd(a, b); + let e = _mm512_setr_pd( + 0., + 2., + f64::INFINITY, + f64::NEG_INFINITY, + f64::INFINITY, + f64::NEG_INFINITY, + -200., + -64., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_mask_mul_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_mul_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd( + 0., + 2., + f64::INFINITY, + f64::NEG_INFINITY, + f64::MAX, + f64::MIN, + -100., + -32., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_maskz_mul_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_mul_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_div_pd(a, b); + let e = _mm512_setr_pd( + 0., + 0.5, + f64::INFINITY, + f64::NEG_INFINITY, + f64::INFINITY, + f64::NEG_INFINITY, + -50., + -16., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_mask_div_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_div_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd( + 0., + 0.5, + f64::INFINITY, + f64::NEG_INFINITY, + f64::MAX, + f64::MIN, + -100., + -32., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_maskz_div_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_div_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epi64(a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epi64(0, a, b); assert_eq_m512i(r, _mm512_setzero_si512()); - let r = _mm512_maskz_abs_epi64(0b01111111, a); - let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 0); + let r = _mm512_maskz_max_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 0, 0, 0, 0); assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_max_pd(a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_max_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_max_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_max_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_max_pd(0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epu64(a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epu64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epu64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epu64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epu64(0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epi64(a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_min_pd(a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 3., 2., 1., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_min_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_min_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_min_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_min_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epu64(a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epu64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epu64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epu64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epu64(0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_sqrt_pd(a); + let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_mask_sqrt_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_sqrt_pd(a, 0b00001111, a); + let e = _mm512_setr_pd(0., 1., 2., 3., 16., 25., 36., 49.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_maskz_sqrt_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sqrt_pd(0b00001111, a); + let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_setzero_pd() { assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));