@@ -596,24 +596,24 @@ NK_PUBLIC void nk_sparse_intersect_u32_ice( //
596596 */
597597NK_INTERNAL nk_u8_t nk_intersect_u64x8_ice_ (__m512i a, __m512i b) {
598598 __m512i a1 = _mm512_alignr_epi64 (a, a, 2 );
599- __m512i b1 = _mm512_shuffle_i64x2 (b, b, _MM_SHUFFLE ( 2 , 1 , 0 , 3 ) );
599+ __m512i b1 = _mm512_permutex_epi64 (b, _MM_PERM_ADCB );
600600 __mmask8 nm00 = _mm512_cmpneq_epi64_mask (a, b);
601601
602602 __m512i a2 = _mm512_alignr_epi64 (a, a, 4 );
603603 __m512i a3 = _mm512_alignr_epi64 (a, a, 6 );
604604 __mmask8 nm01 = _mm512_cmpneq_epi64_mask (a1, b);
605605 __mmask8 nm02 = _mm512_cmpneq_epi64_mask (a2, b);
606606
607+ __m512i b2 = _mm512_permutex_epi64 (b, _MM_PERM_BADC);
607608 __mmask8 nm03 = _mm512_cmpneq_epi64_mask (a3, b);
608609 __mmask8 nm10 = _mm512_mask_cmpneq_epi64_mask (nm00, a, b1);
609610 __mmask8 nm11 = _mm512_mask_cmpneq_epi64_mask (nm01, a1, b1);
610611
611- __m512i b2 = _mm512_shuffle_i64x2 (b, b, _MM_SHUFFLE ( 1 , 0 , 3 , 2 ) );
612+ __m512i b3 = _mm512_permutex_epi64 (b, _MM_PERM_CBAD );
612613 __mmask8 nm12 = _mm512_mask_cmpneq_epi64_mask (nm02, a2, b1);
613614 __mmask8 nm13 = _mm512_mask_cmpneq_epi64_mask (nm03, a3, b1);
614615 __mmask8 nm20 = _mm512_mask_cmpneq_epi64_mask (nm10, a, b2);
615616
616- __m512i b3 = _mm512_shuffle_i64x2 (b, b, _MM_SHUFFLE (0 , 3 , 2 , 1 ));
617617 __mmask8 nm21 = _mm512_mask_cmpneq_epi64_mask (nm11, a1, b2);
618618 __mmask8 nm22 = _mm512_mask_cmpneq_epi64_mask (nm12, a2, b2);
619619 __mmask8 nm23 = _mm512_mask_cmpneq_epi64_mask (nm13, a3, b2);
0 commit comments