Skip to content

Commit 3689f5f

Browse files
committed
Add default minimal factory for xcr0
1 parent 8688967 commit 3689f5f

File tree

3 files changed

+49
-44
lines changed

3 files changed

+49
-44
lines changed

include/xsimd/config/xsimd_cpuid.hpp

Lines changed: 23 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -115,52 +115,37 @@ namespace xsimd
115115
rvv = bool(getauxval(AT_HWCAP) & HWCAP_V);
116116
#endif
117117
#endif
118+
// Safe on all platforms, we simply be false
118119
const auto cpuid = xsimd::x86_cpu_id::read();
119-
auto xcr0 = xsimd::x86_xcr0::make_false();
120-
121-
bool sse_enabled = true;
122-
// AVX and AVX512 strictly require OSXSAVE to be enabled by the OS.
123-
// If OSXSAVE is disabled (e.g., via bcdedit /set xsavedisable 1),
124-
// AVX state won't be preserved across context switches, so AVX cannot be used.
125-
bool avx_enabled = false;
126-
bool avx512_enabled = false;
127-
128-
if (cpuid.osxsave())
129-
{
130-
xcr0 = xsimd::x86_xcr0::read();
131-
132-
sse_enabled = xcr0.sse_enabled();
133-
avx_enabled = xcr0.avx_enabled();
134-
avx512_enabled = xcr0.avx512_enabled();
135-
}
136-
137-
sse2 = cpuid.sse2() && sse_enabled;
138-
sse3 = cpuid.sse3() && sse_enabled;
139-
ssse3 = cpuid.ssse3() && sse_enabled;
140-
sse4_1 = cpuid.sse4_1() && sse_enabled;
141-
sse4_2 = cpuid.sse4_2() && sse_enabled;
142-
fma3_sse42 = cpuid.fma3() && sse_enabled;
120+
const auto xcr0 = cpuid.osxsave() ? x86_xcr0::read() : x86_xcr0::safe_default();
121+
122+
sse2 = cpuid.sse2() && xcr0.sse_enabled();
123+
sse3 = cpuid.sse3() && xcr0.sse_enabled();
124+
ssse3 = cpuid.ssse3() && xcr0.sse_enabled();
125+
sse4_1 = cpuid.sse4_1() && xcr0.sse_enabled();
126+
sse4_2 = cpuid.sse4_2() && xcr0.sse_enabled();
127+
fma3_sse42 = cpuid.fma3() && xcr0.sse_enabled();
143128

144129
// sse4a not implemented in cpu_id yet
145130
// xop not implemented in cpu_id yet
146131

147-
avx = cpuid.avx() && avx_enabled;
132+
avx = cpuid.avx() && xcr0.avx_enabled();
148133
fma3_avx = avx && fma3_sse42;
149-
fma4 = cpuid.fma4() && avx_enabled;
150-
avx2 = cpuid.avx2() && avx_enabled;
151-
avxvnni = cpuid.avxvnni() && avx_enabled;
134+
fma4 = cpuid.fma4() && xcr0.avx_enabled();
135+
avx2 = cpuid.avx2() && xcr0.avx_enabled();
136+
avxvnni = cpuid.avxvnni() && xcr0.avx_enabled();
152137
fma3_avx2 = avx2 && fma3_sse42;
153138

154-
avx512f = cpuid.avx512f() && avx512_enabled;
155-
avx512cd = cpuid.avx512cd() && avx512_enabled;
156-
avx512dq = cpuid.avx512dq() && avx512_enabled;
157-
avx512bw = cpuid.avx512bw() && avx512_enabled;
158-
avx512er = cpuid.avx512er() && avx512_enabled;
159-
avx512pf = cpuid.avx512pf() && avx512_enabled;
160-
avx512ifma = cpuid.avx512ifma() && avx512_enabled;
161-
avx512vbmi = cpuid.avx512vbmi() && avx512_enabled;
162-
avx512vbmi2 = cpuid.avx512vbmi2() && avx512_enabled;
163-
avx512vnni_bw = cpuid.avx512vnni_bw() && avx512_enabled;
139+
avx512f = cpuid.avx512f() && xcr0.avx512_enabled();
140+
avx512cd = cpuid.avx512cd() && xcr0.avx512_enabled();
141+
avx512dq = cpuid.avx512dq() && xcr0.avx512_enabled();
142+
avx512bw = cpuid.avx512bw() && xcr0.avx512_enabled();
143+
avx512er = cpuid.avx512er() && xcr0.avx512_enabled();
144+
avx512pf = cpuid.avx512pf() && xcr0.avx512_enabled();
145+
avx512ifma = cpuid.avx512ifma() && xcr0.avx512_enabled();
146+
avx512vbmi = cpuid.avx512vbmi() && xcr0.avx512_enabled();
147+
avx512vbmi2 = cpuid.avx512vbmi2() && xcr0.avx512_enabled();
148+
avx512vnni_bw = cpuid.avx512vnni_bw() && xcr0.avx512_enabled();
164149
avx512vnni_vbmi2 = avx512vbmi2 && avx512vnni_bw;
165150
}
166151
};

include/xsimd/utils/bits.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ namespace xsimd
3535
constexpr I mask = make_bit_mask<I>(static_cast<I>(Bits)...);
3636
return (value & mask) == mask;
3737
}
38+
39+
template <int Bit, typename I>
40+
constexpr I set_bit(I value)
41+
{
42+
constexpr I mask = make_bit_mask<I>(static_cast<I>(Bit));
43+
return value | mask;
44+
}
3845
}
3946
}
4047

include/xsimd/xsimd_cpu_features_x86.hpp

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,29 @@ namespace xsimd
4747
public:
4848
using reg_t = detail::xcr0_reg_t;
4949

50+
static constexpr reg_t sse_bit = 1;
51+
static constexpr reg_t avx_bit = 2;
52+
static constexpr reg_t avx512_bit = 6;
53+
5054
/** Parse a XCR0 value into individual components. */
5155
constexpr explicit x86_xcr0(reg_t low) noexcept
5256
: m_low(low)
5357
{
5458
}
5559

56-
/** Create an object that has all features set to false. */
57-
static constexpr x86_xcr0 make_false()
60+
/**
61+
* Create a default value with only SSE enabled.
62+
*
63+
* AVX and AVX512 strictly require OSXSAVE to be enabled by the OS.
64+
* If OSXSAVE is disabled (e.g., via bcdedit /set xsavedisable 1), AVX state won't
65+
* be preserved across context switches, so AVX cannot be used.
66+
* SSE is therefore the only value safe to assume.
67+
*/
68+
constexpr static x86_xcr0 safe_default() noexcept
5869
{
59-
return x86_xcr0(0);
70+
reg_t low = {};
71+
low = utils::set_bit<sse_bit>(low);
72+
return x86_xcr0(low);
6073
}
6174

6275
/** Read the XCR0 register from the CPU if on the correct architecture. */
@@ -67,20 +80,20 @@ namespace xsimd
6780

6881
constexpr bool sse_enabled() const noexcept
6982
{
70-
return utils::bit_is_set<1>(m_low);
83+
return utils::bit_is_set<sse_bit>(m_low);
7184
}
7285

7386
constexpr bool avx_enabled() const noexcept
7487
{
7588
// Check both SSE and AVX bits even though AVX must imply SSE
76-
return utils::bit_is_set<1, 2>(m_low);
89+
return utils::bit_is_set<sse_bit, avx_bit>(m_low);
7790
}
7891

7992
constexpr bool avx512_enabled() const noexcept
8093
{
8194
// Check all SSE, AVX, and AVX512 bits even though AVX512 must
8295
// imply AVX and SSE
83-
return utils::bit_is_set<1, 2, 6>(m_low);
96+
return utils::bit_is_set<sse_bit, avx_bit, avx512_bit>(m_low);
8497
}
8598

8699
private:

0 commit comments

Comments
 (0)