@@ -37,11 +37,17 @@ unsigned rhash_ctz(unsigned x)
3737# else /* _MSC_VER >= 1300... */
3838
3939/**
40- * Returns index of the trailing bit of a 32-bit number.
41- * This is a plain C equivalent for GCC __builtin_ctz() bit scan .
40+ * Returns index of the least significant set bit in a 32-bit number.
41+ * This operation is also known as Count Trailing Zeros (CTZ) .
4242 *
43- * @param x the number to process
44- * @return zero-based index of the trailing bit
43+ * The function is a portable, branch-free equivalent of GCC's __builtin_ctz(),
44+ * using a De Bruijn sequence for constant-time lookup.
45+ *
46+ * @param x 32-bit unsigned integer to analyze (must not be zero)
47+ * @return zero-based index of the least significant set bit (0 to 31)
48+ *
49+ * @note Undefined behavior when `x == 0`. The current implementation
50+ * returns 0, but this value must not be relied upon.
4551 */
4652unsigned rhash_ctz (unsigned x )
4753{
@@ -64,23 +70,40 @@ unsigned rhash_ctz(unsigned x)
6470
6571#ifndef rhash_ctz64
6672/**
67- * Returns index of the trailing bit of a 64-bit number.
68- * This is a plain C equivalent for GCC __builtin_ctzll() bit scan.
69- * Original author: Matt Taylor (2003).
73+ * Returns the zero-based index of the least significant set bit in a 64-bit number.
74+ * This operation is also known as Count Trailing Zeros (CTZ).
7075 *
71- * @param x the number to process
72- * @return zero-based index of the trailing bit
76+ * The function is a portable, branch-free equivalent of GCC's __builtin_ctzll().
77+ * Uses a 32-bit optimized implementation with magic constant `0x78291ACF`,
78+ * based on Matt Taylor's original algorithm (2003).
79+ *
80+ * @param x 64-bit unsigned integer to analyze (must not be zero)
81+ * @return zero-based index of the least significant set bit (0 to 63)
82+ *
83+ * @note Undefined behavior when `x == 0`. The current implementation
84+ * returns 63, but this value must not be relied upon.
85+ * @see rhash_ctz() for 32-bit version.
7386 */
7487unsigned rhash_ctz64 (uint64_t x )
7588{
76- /* array for conversion to bit position */
89+ /* lookup table mapping hash values to bit position */
7790 static unsigned char bit_pos [64 ] = {
7891 63 , 30 , 3 , 32 , 59 , 14 , 11 , 33 , 60 , 24 , 50 , 9 , 55 , 19 , 21 , 34 ,
7992 61 , 29 , 2 , 53 , 51 , 23 , 41 , 18 , 56 , 28 , 1 , 43 , 46 , 27 , 0 , 35 ,
8093 62 , 31 , 58 , 4 , 5 , 49 , 54 , 6 , 15 , 52 , 12 , 40 , 7 , 42 , 45 , 16 ,
8194 25 , 57 , 48 , 13 , 10 , 39 , 8 , 44 , 20 , 47 , 38 , 22 , 17 , 37 , 36 , 26
8295 };
83- uint32_t folded = (uint32_t )(((x - 1 ) >> 32 ) ^ (x - 1 ));
96+ /* transform 0b01000 -> 0b01111 (isolate least significant bit) */
97+ x ^= x - 1 ;
98+ /* fold 64-bit value to 32-bit to be efficient on 32-bit systems */
99+ uint32_t folded = (uint32_t )((x >> 32 ) ^ x );
100+ /* Use Matt Taylor's multiplication trick (2003):
101+ * - multiply by (specially chosen) magic constant 0x78291ACF
102+ * - use top 6 bits of result (>>26) as table index
103+ * Original discussion:
104+ * https://groups.google.com/g/comp.lang.asm.x86/c/3pVGzQGb1ys/m/fPpKBKNi848J
105+ * https://groups.google.com/g/comp.lang.asm.x86/c/3pVGzQGb1ys/m/230qffQJYvQJ
106+ */
84107 return bit_pos [folded * 0x78291ACF >> 26 ];
85108}
86109#endif /* rhash_ctz64 */
@@ -94,10 +117,10 @@ unsigned rhash_ctz64(uint64_t x)
94117 */
95118unsigned rhash_popcount (unsigned x )
96119{
97- x -= (x >>1 ) & 0x55555555 ;
98- x = ((x >> 2 ) & 0x33333333 ) + (x & 0x33333333 );
99- x = ((x >> 4 ) + x ) & 0x0f0f0f0f ;
100- return (x * 0x01010101 ) >> 24 ;
120+ x -= (x >>1 ) & 0x55555555 ;
121+ x = ((x >> 2 ) & 0x33333333 ) + (x & 0x33333333 );
122+ x = ((x >> 4 ) + x ) & 0x0f0f0f0f ;
123+ return (x * 0x01010101 ) >> 24 ;
101124}
102125#endif /* rhash_popcount */
103126
@@ -216,10 +239,10 @@ void rhash_u32_mem_swap(unsigned* arr, int length)
216239# if defined(HAS_GCC_INTEL_CPUID )
217240# include <cpuid.h>
218241# define RHASH_CPUID (id , regs ) \
219- __get_cpuid(id, &(regs[0]), &(regs[1]), &(regs[2]), &(regs[3]));
242+ __get_cpuid(id, &(regs[0]), &(regs[1]), &(regs[2]), &(regs[3]));
220243# if HAS_GNUC (6 , 3 )
221244# define RHASH_CPUIDEX (id , sub_id , regs ) \
222- __get_cpuid_count(id, sub_id, ®s[0], ®s[1], ®s[2], ®s[3]);
245+ __get_cpuid_count(id, sub_id, ®s[0], ®s[1], ®s[2], ®s[3]);
223246# endif
224247# elif defined(HAS_MSVC_INTEL_CPUID )
225248# define RHASH_CPUID (id , regs ) __cpuid((int*)regs, id)
@@ -245,7 +268,7 @@ static uint64_t get_cpuid_features(void)
245268 if (cpu_info [0 ] >= 7 )
246269 {
247270 /* Request CPUID AX=7 CX=0 to get SHANI bit */
248- RHASH_CPUIDEX (7 , 0 , cpu_info );
271+ RHASH_CPUIDEX (7 , 0 , cpu_info );
249272 result |= (cpu_info [1 ] & (1 << 29 ));
250273 }
251274#endif
0 commit comments