Skip to content

Commit b78d414

Browse files
committed
Optimize non-relaxed load/store on pre-v6 ARM Linux/Android
1 parent 07acf41 commit b78d414

File tree

6 files changed

+159
-12
lines changed

6 files changed

+159
-12
lines changed

bench/benches/imp/spinlock_fallback.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@
99
//
1010
// This module is currently only enabled on benchmark.
1111

12-
use core::{
13-
cell::UnsafeCell,
14-
sync::atomic::{AtomicUsize, Ordering},
15-
};
12+
use core::{cell::UnsafeCell, sync::atomic::Ordering};
1613

1714
use super::fallback::utils::{Backoff, CachePadded};
15+
use crate::imp::AtomicUsize;
1816

1917
struct Spinlock {
2018
state: AtomicUsize,

build.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -262,12 +262,14 @@ fn main() {
262262
}
263263
}
264264
target_feature_if("mclass", is_mclass, &version, Nightly);
265+
let mut v5te = known && subarch.starts_with("v5te");
265266
let v6 = known
266267
&& (subarch.starts_with("v6")
267268
|| subarch.starts_with("v7")
268269
|| subarch.starts_with("v8")
269270
|| subarch.starts_with("v9"));
270-
target_feature_if("v6", v6, &version, Nightly);
271+
v5te |= target_feature_if("v6", v6, &version, Nightly);
272+
target_feature_if("v5te", v5te, &version, Nightly);
271273
}
272274
"powerpc64" => {
273275
// For Miri and ThreadSanitizer.

src/imp/arm_linux.rs

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,27 @@
1313
// be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+.
1414
// https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html
1515

16+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
1617
#[path = "fallback/outline_atomics.rs"]
1718
mod fallback;
1819

19-
use core::{arch::asm, cell::UnsafeCell, mem, sync::atomic::Ordering};
20+
use core::{arch::asm, sync::atomic::Ordering};
21+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
22+
use core::{cell::UnsafeCell, mem};
2023

21-
use crate::utils::{Pair, U64};
24+
use super::core_atomic::{
25+
AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8,
26+
AtomicUsize,
27+
};
2228

2329
// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
2430
const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC;
31+
// __kuser_helper_version >= 3 (kernel version 2.6.15+)
32+
const KUSER_MEMORY_BARRIER: usize = 0xFFFF0FA0;
2533
// __kuser_helper_version >= 5 (kernel version 3.1+)
34+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
2635
const KUSER_CMPXCHG64: usize = 0xFFFF0F60;
36+
2737
#[inline]
2838
fn __kuser_helper_version() -> i32 {
2939
use core::sync::atomic::AtomicI32;
@@ -39,6 +49,7 @@ fn __kuser_helper_version() -> i32 {
3949
CACHE.store(v, Ordering::Relaxed);
4050
v
4151
}
52+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
4253
#[inline]
4354
fn has_kuser_cmpxchg64() -> bool {
4455
// Note: detect_false cfg is intended to make it easy for portable-atomic developers to
@@ -49,6 +60,7 @@ fn has_kuser_cmpxchg64() -> bool {
4960
}
5061
__kuser_helper_version() >= 5
5162
}
63+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
5264
#[inline]
5365
unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool {
5466
// SAFETY: the caller must uphold the safety contract.
@@ -59,7 +71,107 @@ unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut
5971
}
6072
}
6173

74+
#[cfg(any(target_feature = "v5te", portable_atomic_target_feature = "v5te"))]
75+
macro_rules! blx {
76+
($addr:tt) => {
77+
concat!("blx ", $addr)
78+
};
79+
}
80+
#[cfg(not(any(target_feature = "v5te", portable_atomic_target_feature = "v5te")))]
81+
macro_rules! blx {
82+
($addr:tt) => {
83+
concat!("mov lr, pc", "\n", "bx ", $addr)
84+
};
85+
}
86+
87+
macro_rules! atomic_load_store {
88+
($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
89+
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
90+
#[cfg_attr(
91+
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
92+
track_caller
93+
)]
94+
#[inline]
95+
pub(crate) fn load(&self, order: Ordering) -> $value_type {
96+
crate::utils::assert_load_ordering(order);
97+
// SAFETY: any data races are prevented by atomic intrinsics and the raw
98+
// pointer passed in is valid because we got it from a reference.
99+
unsafe {
100+
match order {
101+
Ordering::Relaxed => self.inner.load(Ordering::Relaxed),
102+
// Acquire and SeqCst loads are equivalent.
103+
Ordering::Acquire | Ordering::SeqCst => {
104+
debug_assert!(__kuser_helper_version() >= 3);
105+
let src = self.as_ptr();
106+
let out;
107+
asm!(
108+
concat!("ldr", $asm_suffix, " {out}, [{src}]"),
109+
blx!("{kuser_memory_barrier}"),
110+
src = in(reg) src,
111+
out = lateout(reg) out,
112+
kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _,
113+
out("lr") _,
114+
options(nostack, preserves_flags),
115+
);
116+
out
117+
}
118+
_ => unreachable!("{:?}", order),
119+
}
120+
}
121+
}
122+
#[inline]
123+
#[cfg_attr(
124+
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
125+
track_caller
126+
)]
127+
pub(crate) fn store(&self, val: $value_type, order: Ordering) {
128+
crate::utils::assert_store_ordering(order);
129+
let dst = self.as_ptr();
130+
// SAFETY: any data races are prevented by atomic intrinsics and the raw
131+
// pointer passed in is valid because we got it from a reference.
132+
unsafe {
133+
macro_rules! atomic_store_release {
134+
($acquire:expr) => {{
135+
debug_assert!(__kuser_helper_version() >= 3);
136+
asm!(
137+
blx!("{kuser_memory_barrier}"),
138+
concat!("str", $asm_suffix, " {val}, [{dst}]"),
139+
$acquire,
140+
dst = in(reg) dst,
141+
val = in(reg) val,
142+
kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _,
143+
out("lr") _,
144+
options(nostack, preserves_flags),
145+
)
146+
}};
147+
}
148+
match order {
149+
Ordering::Relaxed => self.inner.store(val, Ordering::Relaxed),
150+
Ordering::Release => atomic_store_release!(""),
151+
Ordering::SeqCst => atomic_store_release!(blx!("{kuser_memory_barrier}")),
152+
_ => unreachable!("{:?}", order),
153+
}
154+
}
155+
}
156+
}
157+
};
158+
}
159+
160+
atomic_load_store!(AtomicI8, i8, "b");
161+
atomic_load_store!(AtomicU8, u8, "b");
162+
atomic_load_store!(AtomicI16, i16, "h");
163+
atomic_load_store!(AtomicU16, u16, "h");
164+
atomic_load_store!(AtomicI32, i32, "");
165+
atomic_load_store!(AtomicU32, u32, "");
166+
atomic_load_store!(AtomicIsize, isize, "");
167+
atomic_load_store!(AtomicUsize, usize, "");
168+
atomic_load_store!([T] AtomicPtr, *mut T, "");
169+
170+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
171+
use crate::utils::{Pair, U64};
172+
62173
// 64-bit atomic load by two 32-bit atomic loads.
174+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
63175
#[inline]
64176
unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
65177
// SAFETY: the caller must uphold the safety contract.
@@ -77,6 +189,7 @@ unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
77189
}
78190
}
79191

192+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
80193
#[inline(always)]
81194
unsafe fn atomic_update_kuser_cmpxchg64<F>(dst: *mut u64, mut f: F) -> u64
82195
where
@@ -108,6 +221,7 @@ macro_rules! atomic_with_ifunc {
108221
unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)? { $($kuser_cmpxchg64_fn_body:tt)* }
109222
fallback = $seqcst_fallback_fn:ident
110223
) => {
224+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
111225
#[inline]
112226
unsafe fn $name($($arg)*) $(-> $ret_ty)? {
113227
unsafe fn kuser_cmpxchg64_fn($($arg)*) $(-> $ret_ty)? {
@@ -252,6 +366,7 @@ atomic_with_ifunc! {
252366
fallback = atomic_neg_seqcst
253367
}
254368

369+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
255370
macro_rules! atomic64 {
256371
($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
257372
#[repr(C, align(8))]
@@ -441,7 +556,9 @@ macro_rules! atomic64 {
441556
};
442557
}
443558

559+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
444560
atomic64!(AtomicI64, i64, atomic_max, atomic_min);
561+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
445562
atomic64!(AtomicU64, u64, atomic_umax, atomic_umin);
446563

447564
#[allow(
@@ -462,10 +579,13 @@ mod tests {
462579
assert_eq!(version, unsafe { (KUSER_HELPER_VERSION as *const i32).read() });
463580
}
464581

582+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
465583
test_atomic_int!(i64);
584+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
466585
test_atomic_int!(u64);
467586

468587
// load/store/swap implementation is not affected by signedness, so it is
469588
// enough to test only unsigned types.
589+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
470590
stress_test!(u64);
471591
}

src/imp/core_atomic.rs

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ unsafe impl Sync for NotRefUnwindSafe {}
2222

2323
#[repr(transparent)]
2424
pub(crate) struct AtomicPtr<T> {
25-
inner: core::sync::atomic::AtomicPtr<T>,
25+
pub(crate) inner: core::sync::atomic::AtomicPtr<T>,
2626
// Prevent RefUnwindSafe from being propagated from the std atomic type. See NotRefUnwindSafe for more.
2727
_not_ref_unwind_safe: PhantomData<NotRefUnwindSafe>,
2828
}
@@ -47,6 +47,13 @@ impl<T> AtomicPtr<T> {
4747
pub(crate) fn into_inner(self) -> *mut T {
4848
self.inner.into_inner()
4949
}
50+
#[cfg(not(all(
51+
not(any(miri, portable_atomic_sanitize_thread)),
52+
not(portable_atomic_no_asm),
53+
target_arch = "arm",
54+
any(target_os = "linux", target_os = "android"),
55+
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
56+
)))]
5057
#[inline]
5158
#[cfg_attr(
5259
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
@@ -56,6 +63,13 @@ impl<T> AtomicPtr<T> {
5663
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
5764
self.inner.load(order)
5865
}
66+
#[cfg(not(all(
67+
not(any(miri, portable_atomic_sanitize_thread)),
68+
not(portable_atomic_no_asm),
69+
target_arch = "arm",
70+
any(target_os = "linux", target_os = "android"),
71+
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
72+
)))]
5973
#[inline]
6074
#[cfg_attr(
6175
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
@@ -127,7 +141,7 @@ macro_rules! atomic_int {
127141
($atomic_type:ident, $int_type:ident) => {
128142
#[repr(transparent)]
129143
pub(crate) struct $atomic_type {
130-
inner: core::sync::atomic::$atomic_type,
144+
pub(crate) inner: core::sync::atomic::$atomic_type,
131145
// Prevent RefUnwindSafe from being propagated from the std atomic type. See NotRefUnwindSafe for more.
132146
_not_ref_unwind_safe: PhantomData<NotRefUnwindSafe>,
133147
}
@@ -177,6 +191,13 @@ macro_rules! atomic_int {
177191
pub(crate) fn into_inner(self) -> $int_type {
178192
self.inner.into_inner()
179193
}
194+
#[cfg(not(all(
195+
not(any(miri, portable_atomic_sanitize_thread)),
196+
not(portable_atomic_no_asm),
197+
target_arch = "arm",
198+
any(target_os = "linux", target_os = "android"),
199+
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
200+
)))]
180201
#[inline]
181202
#[cfg_attr(
182203
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
@@ -186,6 +207,13 @@ macro_rules! atomic_int {
186207
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
187208
self.inner.load(order)
188209
}
210+
#[cfg(not(all(
211+
not(any(miri, portable_atomic_sanitize_thread)),
212+
not(portable_atomic_no_asm),
213+
target_arch = "arm",
214+
any(target_os = "linux", target_os = "android"),
215+
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
216+
)))]
189217
#[inline]
190218
#[cfg_attr(
191219
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),

src/imp/fallback/seq_lock_wide.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44

55
use core::{
66
mem::ManuallyDrop,
7-
sync::atomic::{self, AtomicUsize, Ordering},
7+
sync::atomic::{self, Ordering},
88
};
99

1010
use super::utils::Backoff;
11+
use crate::imp::AtomicUsize;
1112

1213
// See mod.rs for details.
1314
pub(super) type AtomicChunk = AtomicUsize;

src/imp/mod.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,13 @@ mod powerpc64;
113113
mod s390x;
114114

115115
// pre-v6 ARM Linux 64-bit atomics
116-
#[cfg(feature = "fallback")]
117116
// Miri and Sanitizer do not support inline assembly.
118117
#[cfg(all(
119118
target_arch = "arm",
120119
not(any(miri, portable_atomic_sanitize_thread)),
121120
not(portable_atomic_no_asm),
122121
any(target_os = "linux", target_os = "android"),
123122
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
124-
not(portable_atomic_no_outline_atomics),
125123
))]
126124
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
127125
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]

0 commit comments

Comments
 (0)