From 962a814b849f160ad6bdd924bbfd535fd7d6a080 Mon Sep 17 00:00:00 2001 From: Zachary Levy Date: Sat, 6 Jun 2026 02:38:08 +0000 Subject: [PATCH] Spinlock features (#33) Co-authored-by: Zachary Levy Reviewed-on: https://git.bfpower.io/BFPOWER/levlib/pulls/33 --- levsync/levsync.odin | 189 ++++++++++++++++++++++++++++++------------- 1 file changed, 132 insertions(+), 57 deletions(-) diff --git a/levsync/levsync.odin b/levsync/levsync.odin index e9bbd94..f8c1d44 100644 --- a/levsync/levsync.odin +++ b/levsync/levsync.odin @@ -120,10 +120,52 @@ spinlock_try_lock :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool return lock_acquired } +// Spins until the lock is acquired, relaxing the CPU between attempts. +spinlock_lock :: #force_inline proc "contextless" (lock: ^Spinlock) { + for !spinlock_try_lock(lock) { + intrinsics.cpu_relax() + } +} + spinlock_unlock :: #force_inline proc "contextless" (lock: ^Spinlock) { intrinsics.atomic_store_explicit(lock, false, .Release) } +// Spins until the lock is acquired, then unlocks at the end of the calling scope. Always returns +// true so it can guard a critical section from within an `if`: +// +// if spinlock_guard(&lock) { +// // critical section +// } +@(deferred_in = spinlock_unlock) +spinlock_guard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool { + spinlock_lock(lock) + return true +} + +// Tries to acquire the lock once without spinning. Returns true and unlocks at the end of the +// calling scope if acquired, otherwise returns false and does nothing: +// +// if spinlock_tryguard(&lock) { +// // critical section, entered only if the lock was acquired +// } +@(deferred_in_out = spinlock_tryguard_unlock) +spinlock_tryguard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool { + return spinlock_try_lock(lock) +} + +// Deferred companion of `spinlock_tryguard`; unlocks only when the lock was actually acquired. +@(private) +spinlock_tryguard_unlock :: #force_inline proc "contextless" (lock: ^Spinlock, locked: bool) { + if locked { + spinlock_unlock(lock) + } +} + +lock :: proc { + spinlock_lock, +} + try_lock :: proc { spinlock_try_lock, } @@ -132,6 +174,14 @@ unlock :: proc { spinlock_unlock, } +guard :: proc { + spinlock_guard, +} + +tryguard :: proc { + spinlock_tryguard, +} + // --------------------------------------------------------------------------------------------------------------------- // ----- Tests ------------------------ // --------------------------------------------------------------------------------------------------------------------- @@ -139,10 +189,10 @@ import "core:sync" import "core:testing" import "core:thread" +// Multiple threads will each add 1.0 this many times. +// If any updates are lost due to race conditions, the final sum will be wrong. @(test) test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) { - // Multiple threads will each add 1.0 this many times. - // If any updates are lost due to race conditions, the final sum will be wrong. NUM_THREADS :: 8 ITERATIONS_PER_THREAD :: 10_000 @@ -184,10 +234,10 @@ test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) { testing.expect_value(t, shared_value, expected) } +// Start with a known value, multiple threads subtract. +// If any updates are lost due to race conditions, the final result will be wrong. @(test) test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) { - // Start with a known value, multiple threads subtract. - // If any updates are lost due to race conditions, the final result will be wrong. NUM_THREADS :: 8 ITERATIONS_PER_THREAD :: 10_000 @@ -228,11 +278,11 @@ test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) { testing.expect_value(t, shared_value, 0.0) } +// Each thread multiplies by 2.0 then divides by 2.0. +// Since these are inverses, the final value should equal the starting value +// regardless of how operations interleave. @(test) test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) { - // Each thread multiplies by 2.0 then divides by 2.0. - // Since these are inverses, the final value should equal the starting value - // regardless of how operations interleave. NUM_THREADS :: 8 ITERATIONS_PER_THREAD :: 10_000 @@ -274,10 +324,10 @@ test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) { testing.expect_value(t, shared_value, 1000.0) } +// Verify the f32 type dispatch works correctly under contention. +// Same approach as the f64 add test but with f32. @(test) test_atomic_add_with_f32 :: proc(t: ^testing.T) { - // Verify the f32 type dispatch works correctly under contention. - // Same approach as the f64 add test but with f32. NUM_THREADS :: 8 ITERATIONS_PER_THREAD :: 10_000 @@ -319,17 +369,17 @@ test_atomic_add_with_f32 :: proc(t: ^testing.T) { testing.expect_value(t, shared_value, expected) } +// Tests that the memory order passed to atomic_float_op's CAS success condition +// provides full ordering guarantees for the entire float operation. +// +// Both sides use atomic_add_float (not raw intrinsics) to verify: +// - Release on CAS success publishes prior non-atomic writes +// - Acquire on CAS success makes those writes visible to the reader +// +// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model. +// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures. @(test) test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) { - // Tests that the memory order passed to atomic_float_op's CAS success condition - // provides full ordering guarantees for the entire float operation. - // - // Both sides use atomic_add_float (not raw intrinsics) to verify: - // - Release on CAS success publishes prior non-atomic writes - // - Acquire on CAS success makes those writes visible to the reader - // - // NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model. - // On ARM or other weak-memory architectures, using Relaxed here would likely cause failures. NUM_READERS :: 4 Shared_State :: struct { @@ -426,17 +476,20 @@ test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) { } } +// Stress test for every spinlock acquisition variant: N threads contend on a +// single lock and perform a deliberate non-atomic read-modify-write on shared +// data. Each iteration rotates through spinlock_try_lock, spinlock_lock, +// spinlock_guard, and spinlock_tryguard so every variant runs concurrently and +// must uphold mutual exclusion on the same lock. +// +// If mutual exclusion holds: +// - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD +// - `concurrent_holders` never exceeds 1 +// +// A multi-step RMW (read → relax → write) widens the critical section so +// any failure to exclude is virtually guaranteed to corrupt the counter. @(test) -test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) { - // Stress test for spinlock_try_lock: N threads spin-acquire the lock and - // perform a deliberate non-atomic read-modify-write on shared data. - // - // If mutual exclusion holds: - // - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD - // - `concurrent_holders` never exceeds 1 - // - // A multi-step RMW (read → relax → write) widens the critical section so - // any failure to exclude is virtually guaranteed to corrupt the counter. +test_spinlock_mutual_exclusion :: proc(t: ^testing.T) { NUM_THREADS :: 8 ITERATIONS_PER_THREAD :: 50_000 @@ -461,6 +514,29 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) { barrier: sync.Barrier sync.barrier_init(&barrier, NUM_THREADS) + // The single critical section every acquisition variant must protect. Sharing + // it guarantees they all stress the exact same non-atomic read-modify-write. + critical_section :: proc(s: ^Shared) { + // Atomically bump the holder count so we can detect overlapping holders. + holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed) + + // Track the maximum we ever observed (relaxed is fine, this is + // purely diagnostic and protected by the spinlock for writes). + if holders + 1 > s.max_holders { + s.max_holders = holders + 1 + } + + // Non-atomic RMW: read, spin a tiny bit, then write. + // This deliberately creates a wide window where a second holder + // would cause a lost update. + val := s.counter + intrinsics.cpu_relax() + intrinsics.cpu_relax() + s.counter = val + 1 + + intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed) + } + thread_proc :: proc(th: ^thread.Thread) { ctx := cast(^Thread_Data)th.data s := ctx.shared @@ -468,36 +544,35 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) { // All threads rendezvous here for maximum contention. sync.barrier_wait(ctx.barrier) - for _ in 0 ..< ITERATIONS_PER_THREAD { - // Spin on try_lock until we acquire it. - for !spinlock_try_lock(&s.lock) { - intrinsics.cpu_relax() + for i in 0 ..< ITERATIONS_PER_THREAD { + // Rotate through every acquisition variant so they all contend on the + // same lock simultaneously and must each uphold mutual exclusion. + switch i & 3 { + case 0: + // Manual spin on try_lock until we acquire it. + for !spinlock_try_lock(&s.lock) { + intrinsics.cpu_relax() + } + critical_section(s) + spinlock_unlock(&s.lock) + case 1: + // Blocking lock that loops internally until acquired. + spinlock_lock(&s.lock) + critical_section(s) + spinlock_unlock(&s.lock) + case 2: // Scoped guard: unlocks automatically at the end of the block. + if spinlock_guard(&s.lock) { + critical_section(s) + } + case 3: // Scoped try-guard: retry until acquired, auto-unlocks on success. + for { + if spinlock_tryguard(&s.lock) { + critical_section(s) + break + } + intrinsics.cpu_relax() + } } - - // --- critical section start --- - - // Atomically bump the holder count so we can detect overlapping holders. - holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed) - - // Track the maximum we ever observed (relaxed is fine, this is - // purely diagnostic and protected by the spinlock for writes). - if holders + 1 > s.max_holders { - s.max_holders = holders + 1 - } - - // Non-atomic RMW: read, spin a tiny bit, then write. - // This deliberately creates a wide window where a second holder - // would cause a lost update. - val := s.counter - intrinsics.cpu_relax() - intrinsics.cpu_relax() - s.counter = val + 1 - - intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed) - - // --- critical section end --- - - spinlock_unlock(&s.lock) } }