Skip to content

Commit 3876a8e

Browse files
committed
Optimize non-relaxed load/store on pre-v6 ARM Linux/Android
1 parent bf8aa1d commit 3876a8e

File tree

9 files changed

+168
-17
lines changed

9 files changed

+168
-17
lines changed

bench/benches/bench.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
1717
#[allow(dead_code, unused_macros)]
1818
#[path = "../../src/utils.rs"]
1919
mod utils;
20+
2021
#[allow(dead_code, unused_macros)]
2122
#[macro_use]
2223
#[path = "../../src/tests"]
@@ -45,6 +46,9 @@ mod arch;
4546
#[allow(dead_code, unused_imports)]
4647
#[path = "../../src/imp/arm_linux.rs"]
4748
mod arch;
49+
#[allow(dead_code, unused_imports)]
50+
#[path = "../../src/imp/mod.rs"]
51+
mod imp;
4852
#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
4953
#[allow(dead_code, unused_imports)]
5054
#[path = "../../src/imp/atomic128/intrinsics.rs"]

bench/benches/imp/spinlock_fallback.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@
99
//
1010
// This module is currently only enabled on benchmark.
1111

12-
use core::{
13-
cell::UnsafeCell,
14-
sync::atomic::{AtomicUsize, Ordering},
15-
};
12+
use core::{cell::UnsafeCell, sync::atomic::Ordering};
1613

1714
use super::fallback::utils::{Backoff, CachePadded};
15+
use crate::imp::AtomicUsize;
1816

1917
struct Spinlock {
2018
state: AtomicUsize,

build.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,12 +277,14 @@ fn main() {
277277
}
278278
}
279279
target_feature_if("mclass", is_mclass, &version, None, true);
280+
let mut v5te = known && subarch.starts_with("v5te");
280281
let v6 = known
281282
&& (subarch.starts_with("v6")
282283
|| subarch.starts_with("v7")
283284
|| subarch.starts_with("v8")
284285
|| subarch.starts_with("v9"));
285-
target_feature_if("v6", v6, &version, None, true);
286+
v5te |= target_feature_if("v6", v6, &version, None, true);
287+
target_feature_if("v5te", v5te, &version, None, true);
286288
}
287289
"powerpc64" => {
288290
let target_endian =

src/imp/arm_linux.rs

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,29 @@
1313
// be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+.
1414
// https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html
1515

16+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
1617
#[path = "fallback/outline_atomics.rs"]
1718
mod fallback;
1819

1920
#[cfg(not(portable_atomic_no_asm))]
2021
use core::arch::asm;
21-
use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
22+
use core::sync::atomic::Ordering;
23+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
24+
use core::{cell::UnsafeCell, mem};
2225

23-
use crate::utils::{Pair, U64};
26+
use super::core_atomic::{
27+
AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8,
28+
AtomicUsize,
29+
};
2430

2531
// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
2632
const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC;
33+
// __kuser_helper_version >= 3 (kernel version 2.6.15+)
34+
const KUSER_MEMORY_BARRIER: usize = 0xFFFF0FA0;
2735
// __kuser_helper_version >= 5 (kernel version 3.1+)
36+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
2837
const KUSER_CMPXCHG64: usize = 0xFFFF0F60;
38+
2939
#[inline]
3040
fn __kuser_helper_version() -> i32 {
3141
use core::sync::atomic::AtomicI32;
@@ -41,6 +51,7 @@ fn __kuser_helper_version() -> i32 {
4151
CACHE.store(v, Ordering::Relaxed);
4252
v
4353
}
54+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
4455
#[inline]
4556
fn has_kuser_cmpxchg64() -> bool {
4657
// Note: detect_false cfg is intended to make it easy for portable-atomic developers to
@@ -51,6 +62,7 @@ fn has_kuser_cmpxchg64() -> bool {
5162
}
5263
__kuser_helper_version() >= 5
5364
}
65+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
5466
#[inline]
5567
unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool {
5668
// SAFETY: the caller must uphold the safety contract.
@@ -61,7 +73,107 @@ unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut
6173
}
6274
}
6375

76+
#[cfg(any(target_feature = "v5te", portable_atomic_target_feature = "v5te"))]
77+
macro_rules! blx {
78+
($addr:tt) => {
79+
concat!("blx ", $addr)
80+
};
81+
}
82+
#[cfg(not(any(target_feature = "v5te", portable_atomic_target_feature = "v5te")))]
83+
macro_rules! blx {
84+
($addr:tt) => {
85+
concat!("mov lr, pc", "\n", "bx ", $addr)
86+
};
87+
}
88+
89+
macro_rules! atomic_load_store {
90+
($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
91+
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
92+
#[cfg_attr(
93+
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
94+
track_caller
95+
)]
96+
#[inline]
97+
pub(crate) fn load(&self, order: Ordering) -> $value_type {
98+
crate::utils::assert_load_ordering(order);
99+
// SAFETY: any data races are prevented by atomic intrinsics and the raw
100+
// pointer passed in is valid because we got it from a reference.
101+
unsafe {
102+
match order {
103+
Ordering::Relaxed => self.inner.load(Ordering::Relaxed),
104+
// Acquire and SeqCst loads are equivalent.
105+
Ordering::Acquire | Ordering::SeqCst => {
106+
debug_assert!(__kuser_helper_version() >= 3);
107+
let src = self.as_ptr();
108+
let out;
109+
asm!(
110+
concat!("ldr", $asm_suffix, " {out}, [{src}]"),
111+
blx!("{kuser_memory_barrier}"),
112+
src = in(reg) src,
113+
out = lateout(reg) out,
114+
kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _,
115+
out("lr") _,
116+
options(nostack, preserves_flags),
117+
);
118+
out
119+
}
120+
_ => unreachable!("{:?}", order),
121+
}
122+
}
123+
}
124+
#[inline]
125+
#[cfg_attr(
126+
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
127+
track_caller
128+
)]
129+
pub(crate) fn store(&self, val: $value_type, order: Ordering) {
130+
crate::utils::assert_store_ordering(order);
131+
let dst = self.as_ptr();
132+
// SAFETY: any data races are prevented by atomic intrinsics and the raw
133+
// pointer passed in is valid because we got it from a reference.
134+
unsafe {
135+
macro_rules! atomic_store_release {
136+
($acquire:expr) => {{
137+
debug_assert!(__kuser_helper_version() >= 3);
138+
asm!(
139+
blx!("{kuser_memory_barrier}"),
140+
concat!("str", $asm_suffix, " {val}, [{dst}]"),
141+
$acquire,
142+
dst = in(reg) dst,
143+
val = in(reg) val,
144+
kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _,
145+
out("lr") _,
146+
options(nostack, preserves_flags),
147+
)
148+
}};
149+
}
150+
match order {
151+
Ordering::Relaxed => self.inner.store(val, Ordering::Relaxed),
152+
Ordering::Release => atomic_store_release!(""),
153+
Ordering::SeqCst => atomic_store_release!(blx!("{kuser_memory_barrier}")),
154+
_ => unreachable!("{:?}", order),
155+
}
156+
}
157+
}
158+
}
159+
};
160+
}
161+
162+
atomic_load_store!(AtomicI8, i8, "b");
163+
atomic_load_store!(AtomicU8, u8, "b");
164+
atomic_load_store!(AtomicI16, i16, "h");
165+
atomic_load_store!(AtomicU16, u16, "h");
166+
atomic_load_store!(AtomicI32, i32, "");
167+
atomic_load_store!(AtomicU32, u32, "");
168+
atomic_load_store!(AtomicIsize, isize, "");
169+
atomic_load_store!(AtomicUsize, usize, "");
170+
atomic_load_store!([T] AtomicPtr, *mut T, "");
171+
172+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
173+
use crate::utils::{Pair, U64};
174+
64175
// 64-bit atomic load by two 32-bit atomic loads.
176+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
65177
#[inline]
66178
unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
67179
// SAFETY: the caller must uphold the safety contract.
@@ -79,6 +191,7 @@ unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
79191
}
80192
}
81193

194+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
82195
#[inline(always)]
83196
unsafe fn atomic_update_kuser_cmpxchg64<F>(dst: *mut u64, mut f: F) -> u64
84197
where
@@ -110,6 +223,7 @@ macro_rules! atomic_with_ifunc {
110223
unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)? { $($kuser_cmpxchg64_fn_body:tt)* }
111224
fallback = $seqcst_fallback_fn:ident
112225
) => {
226+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
113227
#[inline]
114228
unsafe fn $name($($arg)*) $(-> $ret_ty)? {
115229
unsafe fn kuser_cmpxchg64_fn($($arg)*) $(-> $ret_ty)? {
@@ -254,6 +368,7 @@ atomic_with_ifunc! {
254368
fallback = atomic_neg_seqcst
255369
}
256370

371+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
257372
macro_rules! atomic64 {
258373
($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
259374
#[repr(C, align(8))]
@@ -443,7 +558,9 @@ macro_rules! atomic64 {
443558
};
444559
}
445560

561+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
446562
atomic64!(AtomicI64, i64, atomic_max, atomic_min);
563+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
447564
atomic64!(AtomicU64, u64, atomic_umax, atomic_umin);
448565

449566
#[allow(
@@ -464,10 +581,13 @@ mod tests {
464581
assert_eq!(version, unsafe { (KUSER_HELPER_VERSION as *const i32).read() });
465582
}
466583

584+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
467585
test_atomic_int!(i64);
586+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
468587
test_atomic_int!(u64);
469588

470589
// load/store/swap implementation is not affected by signedness, so it is
471590
// enough to test only unsigned types.
591+
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
472592
stress_test!(u64);
473593
}

src/imp/core_atomic.rs

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ unsafe impl Sync for NoRefUnwindSafe {}
2222

2323
#[repr(transparent)]
2424
pub(crate) struct AtomicPtr<T> {
25-
inner: core::sync::atomic::AtomicPtr<T>,
25+
pub(crate) inner: core::sync::atomic::AtomicPtr<T>,
2626
// Prevent RefUnwindSafe from being propagated from the std atomic type.
2727
_marker: PhantomData<NoRefUnwindSafe>,
2828
}
@@ -47,6 +47,13 @@ impl<T> AtomicPtr<T> {
4747
pub(crate) fn into_inner(self) -> *mut T {
4848
self.inner.into_inner()
4949
}
50+
#[cfg(not(all(
51+
not(any(miri, portable_atomic_sanitize_thread)),
52+
not(portable_atomic_no_asm),
53+
target_arch = "arm",
54+
any(target_os = "linux", target_os = "android"),
55+
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
56+
)))]
5057
#[inline]
5158
#[cfg_attr(
5259
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
@@ -56,6 +63,13 @@ impl<T> AtomicPtr<T> {
5663
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
5764
self.inner.load(order)
5865
}
66+
#[cfg(not(all(
67+
not(any(miri, portable_atomic_sanitize_thread)),
68+
not(portable_atomic_no_asm),
69+
target_arch = "arm",
70+
any(target_os = "linux", target_os = "android"),
71+
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
72+
)))]
5973
#[inline]
6074
#[cfg_attr(
6175
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
@@ -127,7 +141,7 @@ macro_rules! atomic_int {
127141
($atomic_type:ident, $int_type:ident) => {
128142
#[repr(transparent)]
129143
pub(crate) struct $atomic_type {
130-
inner: core::sync::atomic::$atomic_type,
144+
pub(crate) inner: core::sync::atomic::$atomic_type,
131145
// Prevent RefUnwindSafe from being propagated from the std atomic type.
132146
_marker: PhantomData<NoRefUnwindSafe>,
133147
}
@@ -169,6 +183,13 @@ macro_rules! atomic_int {
169183
pub(crate) fn into_inner(self) -> $int_type {
170184
self.inner.into_inner()
171185
}
186+
#[cfg(not(all(
187+
not(any(miri, portable_atomic_sanitize_thread)),
188+
not(portable_atomic_no_asm),
189+
target_arch = "arm",
190+
any(target_os = "linux", target_os = "android"),
191+
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
192+
)))]
172193
#[inline]
173194
#[cfg_attr(
174195
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
@@ -178,6 +199,13 @@ macro_rules! atomic_int {
178199
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
179200
self.inner.load(order)
180201
}
202+
#[cfg(not(all(
203+
not(any(miri, portable_atomic_sanitize_thread)),
204+
not(portable_atomic_no_asm),
205+
target_arch = "arm",
206+
any(target_os = "linux", target_os = "android"),
207+
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
208+
)))]
181209
#[inline]
182210
#[cfg_attr(
183211
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),

src/imp/fallback/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
),
4444
all(
4545
target_arch = "arm",
46-
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
46+
not(portable_atomic_no_asm),
4747
any(target_os = "linux", target_os = "android"),
4848
not(portable_atomic_no_outline_atomics),
4949
),

src/imp/fallback/seq_lock_wide.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44

55
use core::{
66
mem::ManuallyDrop,
7-
sync::atomic::{self, AtomicUsize, Ordering},
7+
sync::atomic::{self, Ordering},
88
};
99

1010
use super::utils::Backoff;
11+
use crate::imp::AtomicUsize;
1112

1213
// See mod.rs for details.
1314
pub(super) type AtomicChunk = AtomicUsize;

src/imp/mod.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,13 @@ mod powerpc64;
119119
mod s390x;
120120

121121
// pre-v6 ARM Linux 64-bit atomics
122-
#[cfg(feature = "fallback")]
123122
// Miri and Sanitizer do not support inline assembly.
124123
#[cfg(all(
125124
target_arch = "arm",
126125
not(any(miri, portable_atomic_sanitize_thread)),
127-
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
126+
not(portable_atomic_no_asm),
128127
any(target_os = "linux", target_os = "android"),
129128
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
130-
not(portable_atomic_no_outline_atomics),
131129
))]
132130
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
133131
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]
@@ -320,7 +318,7 @@ items! {
320318
#[cfg(not(all(
321319
target_arch = "arm",
322320
not(any(miri, portable_atomic_sanitize_thread)),
323-
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
321+
not(portable_atomic_no_asm),
324322
any(target_os = "linux", target_os = "android"),
325323
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
326324
not(portable_atomic_no_outline_atomics),
@@ -387,7 +385,7 @@ items! {
387385
#[cfg(all(
388386
target_arch = "arm",
389387
not(any(miri, portable_atomic_sanitize_thread)),
390-
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
388+
not(portable_atomic_no_asm),
391389
any(target_os = "linux", target_os = "android"),
392390
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
393391
not(portable_atomic_no_outline_atomics),

src/tests/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ fn test_is_lock_free() {
8484
feature = "fallback",
8585
target_arch = "arm",
8686
not(any(miri, portable_atomic_sanitize_thread)),
87-
any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
87+
not(portable_atomic_no_asm),
8888
any(target_os = "linux", target_os = "android"),
8989
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
9090
not(portable_atomic_no_outline_atomics),

0 commit comments

Comments
 (0)