Skip to content

Commit 378daae

Browse files
committed
Fix: Emulating _mm512_2intersect_epi64
1 parent ad73343 commit 378daae

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

include/numkong/sparse.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -596,24 +596,24 @@ NK_PUBLIC void nk_sparse_intersect_u32_ice( //
596596
*/
597597
NK_INTERNAL nk_u8_t nk_intersect_u64x8_ice_(__m512i a, __m512i b) {
598598
__m512i a1 = _mm512_alignr_epi64(a, a, 2);
599-
__m512i b1 = _mm512_shuffle_i64x2(b, b, _MM_SHUFFLE(2, 1, 0, 3));
599+
__m512i b1 = _mm512_permutex_epi64(b, _MM_PERM_ADCB);
600600
__mmask8 nm00 = _mm512_cmpneq_epi64_mask(a, b);
601601

602602
__m512i a2 = _mm512_alignr_epi64(a, a, 4);
603603
__m512i a3 = _mm512_alignr_epi64(a, a, 6);
604604
__mmask8 nm01 = _mm512_cmpneq_epi64_mask(a1, b);
605605
__mmask8 nm02 = _mm512_cmpneq_epi64_mask(a2, b);
606606

607+
__m512i b2 = _mm512_permutex_epi64(b, _MM_PERM_BADC);
607608
__mmask8 nm03 = _mm512_cmpneq_epi64_mask(a3, b);
608609
__mmask8 nm10 = _mm512_mask_cmpneq_epi64_mask(nm00, a, b1);
609610
__mmask8 nm11 = _mm512_mask_cmpneq_epi64_mask(nm01, a1, b1);
610611

611-
__m512i b2 = _mm512_shuffle_i64x2(b, b, _MM_SHUFFLE(1, 0, 3, 2));
612+
__m512i b3 = _mm512_permutex_epi64(b, _MM_PERM_CBAD);
612613
__mmask8 nm12 = _mm512_mask_cmpneq_epi64_mask(nm02, a2, b1);
613614
__mmask8 nm13 = _mm512_mask_cmpneq_epi64_mask(nm03, a3, b1);
614615
__mmask8 nm20 = _mm512_mask_cmpneq_epi64_mask(nm10, a, b2);
615616

616-
__m512i b3 = _mm512_shuffle_i64x2(b, b, _MM_SHUFFLE(0, 3, 2, 1));
617617
__mmask8 nm21 = _mm512_mask_cmpneq_epi64_mask(nm11, a1, b2);
618618
__mmask8 nm22 = _mm512_mask_cmpneq_epi64_mask(nm12, a2, b2);
619619
__mmask8 nm23 = _mm512_mask_cmpneq_epi64_mask(nm13, a3, b2);

0 commit comments

Comments
 (0)