Spinlock features (#33)

Co-authored-by: Zachary Levy <zachary@sunforge.is>
Reviewed-on: #33
This commit was merged in pull request #33.
This commit is contained in:
2026-06-06 02:38:08 +00:00
parent 08f8a9d0b5
commit 962a814b84
+105 -30
View File
@@ -120,10 +120,52 @@ spinlock_try_lock :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool
return lock_acquired
}
// Spins until the lock is acquired, relaxing the CPU between attempts.
spinlock_lock :: #force_inline proc "contextless" (lock: ^Spinlock) {
for !spinlock_try_lock(lock) {
intrinsics.cpu_relax()
}
}
spinlock_unlock :: #force_inline proc "contextless" (lock: ^Spinlock) {
intrinsics.atomic_store_explicit(lock, false, .Release)
}
// Spins until the lock is acquired, then unlocks at the end of the calling scope. Always returns
// true so it can guard a critical section from within an `if`:
//
// if spinlock_guard(&lock) {
// // critical section
// }
@(deferred_in = spinlock_unlock)
spinlock_guard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
spinlock_lock(lock)
return true
}
// Tries to acquire the lock once without spinning. Returns true and unlocks at the end of the
// calling scope if acquired, otherwise returns false and does nothing:
//
// if spinlock_tryguard(&lock) {
// // critical section, entered only if the lock was acquired
// }
@(deferred_in_out = spinlock_tryguard_unlock)
spinlock_tryguard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
return spinlock_try_lock(lock)
}
// Deferred companion of `spinlock_tryguard`; unlocks only when the lock was actually acquired.
@(private)
spinlock_tryguard_unlock :: #force_inline proc "contextless" (lock: ^Spinlock, locked: bool) {
if locked {
spinlock_unlock(lock)
}
}
lock :: proc {
spinlock_lock,
}
try_lock :: proc {
spinlock_try_lock,
}
@@ -132,6 +174,14 @@ unlock :: proc {
spinlock_unlock,
}
guard :: proc {
spinlock_guard,
}
tryguard :: proc {
spinlock_tryguard,
}
// ---------------------------------------------------------------------------------------------------------------------
// ----- Tests ------------------------
// ---------------------------------------------------------------------------------------------------------------------
@@ -139,10 +189,10 @@ import "core:sync"
import "core:testing"
import "core:thread"
@(test)
test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
// Multiple threads will each add 1.0 this many times.
// If any updates are lost due to race conditions, the final sum will be wrong.
@(test)
test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
NUM_THREADS :: 8
ITERATIONS_PER_THREAD :: 10_000
@@ -184,10 +234,10 @@ test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
testing.expect_value(t, shared_value, expected)
}
@(test)
test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
// Start with a known value, multiple threads subtract.
// If any updates are lost due to race conditions, the final result will be wrong.
@(test)
test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
NUM_THREADS :: 8
ITERATIONS_PER_THREAD :: 10_000
@@ -228,11 +278,11 @@ test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
testing.expect_value(t, shared_value, 0.0)
}
@(test)
test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
// Each thread multiplies by 2.0 then divides by 2.0.
// Since these are inverses, the final value should equal the starting value
// regardless of how operations interleave.
@(test)
test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
NUM_THREADS :: 8
ITERATIONS_PER_THREAD :: 10_000
@@ -274,10 +324,10 @@ test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
testing.expect_value(t, shared_value, 1000.0)
}
@(test)
test_atomic_add_with_f32 :: proc(t: ^testing.T) {
// Verify the f32 type dispatch works correctly under contention.
// Same approach as the f64 add test but with f32.
@(test)
test_atomic_add_with_f32 :: proc(t: ^testing.T) {
NUM_THREADS :: 8
ITERATIONS_PER_THREAD :: 10_000
@@ -319,8 +369,6 @@ test_atomic_add_with_f32 :: proc(t: ^testing.T) {
testing.expect_value(t, shared_value, expected)
}
@(test)
test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
// Tests that the memory order passed to atomic_float_op's CAS success condition
// provides full ordering guarantees for the entire float operation.
//
@@ -330,6 +378,8 @@ test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
//
// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
@(test)
test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
NUM_READERS :: 4
Shared_State :: struct {
@@ -426,10 +476,11 @@ test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
}
}
@(test)
test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
// Stress test for spinlock_try_lock: N threads spin-acquire the lock and
// perform a deliberate non-atomic read-modify-write on shared data.
// Stress test for every spinlock acquisition variant: N threads contend on a
// single lock and perform a deliberate non-atomic read-modify-write on shared
// data. Each iteration rotates through spinlock_try_lock, spinlock_lock,
// spinlock_guard, and spinlock_tryguard so every variant runs concurrently and
// must uphold mutual exclusion on the same lock.
//
// If mutual exclusion holds:
// - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
@@ -437,6 +488,8 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
//
// A multi-step RMW (read → relax → write) widens the critical section so
// any failure to exclude is virtually guaranteed to corrupt the counter.
@(test)
test_spinlock_mutual_exclusion :: proc(t: ^testing.T) {
NUM_THREADS :: 8
ITERATIONS_PER_THREAD :: 50_000
@@ -461,21 +514,9 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
barrier: sync.Barrier
sync.barrier_init(&barrier, NUM_THREADS)
thread_proc :: proc(th: ^thread.Thread) {
ctx := cast(^Thread_Data)th.data
s := ctx.shared
// All threads rendezvous here for maximum contention.
sync.barrier_wait(ctx.barrier)
for _ in 0 ..< ITERATIONS_PER_THREAD {
// Spin on try_lock until we acquire it.
for !spinlock_try_lock(&s.lock) {
intrinsics.cpu_relax()
}
// --- critical section start ---
// The single critical section every acquisition variant must protect. Sharing
// it guarantees they all stress the exact same non-atomic read-modify-write.
critical_section :: proc(s: ^Shared) {
// Atomically bump the holder count so we can detect overlapping holders.
holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed)
@@ -494,10 +535,44 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
s.counter = val + 1
intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed)
}
// --- critical section end ---
thread_proc :: proc(th: ^thread.Thread) {
ctx := cast(^Thread_Data)th.data
s := ctx.shared
// All threads rendezvous here for maximum contention.
sync.barrier_wait(ctx.barrier)
for i in 0 ..< ITERATIONS_PER_THREAD {
// Rotate through every acquisition variant so they all contend on the
// same lock simultaneously and must each uphold mutual exclusion.
switch i & 3 {
case 0:
// Manual spin on try_lock until we acquire it.
for !spinlock_try_lock(&s.lock) {
intrinsics.cpu_relax()
}
critical_section(s)
spinlock_unlock(&s.lock)
case 1:
// Blocking lock that loops internally until acquired.
spinlock_lock(&s.lock)
critical_section(s)
spinlock_unlock(&s.lock)
case 2: // Scoped guard: unlocks automatically at the end of the block.
if spinlock_guard(&s.lock) {
critical_section(s)
}
case 3: // Scoped try-guard: retry until acquired, auto-unlocks on success.
for {
if spinlock_tryguard(&s.lock) {
critical_section(s)
break
}
intrinsics.cpu_relax()
}
}
}
}