Spinlock features (#33)
Co-authored-by: Zachary Levy <zachary@sunforge.is> Reviewed-on: #33
This commit was merged in pull request #33.
This commit is contained in:
+119
-44
@@ -120,10 +120,52 @@ spinlock_try_lock :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool
|
||||
return lock_acquired
|
||||
}
|
||||
|
||||
// Spins until the lock is acquired, relaxing the CPU between attempts.
|
||||
spinlock_lock :: #force_inline proc "contextless" (lock: ^Spinlock) {
|
||||
for !spinlock_try_lock(lock) {
|
||||
intrinsics.cpu_relax()
|
||||
}
|
||||
}
|
||||
|
||||
spinlock_unlock :: #force_inline proc "contextless" (lock: ^Spinlock) {
|
||||
intrinsics.atomic_store_explicit(lock, false, .Release)
|
||||
}
|
||||
|
||||
// Spins until the lock is acquired, then unlocks at the end of the calling scope. Always returns
|
||||
// true so it can guard a critical section from within an `if`:
|
||||
//
|
||||
// if spinlock_guard(&lock) {
|
||||
// // critical section
|
||||
// }
|
||||
@(deferred_in = spinlock_unlock)
|
||||
spinlock_guard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
|
||||
spinlock_lock(lock)
|
||||
return true
|
||||
}
|
||||
|
||||
// Tries to acquire the lock once without spinning. Returns true and unlocks at the end of the
|
||||
// calling scope if acquired, otherwise returns false and does nothing:
|
||||
//
|
||||
// if spinlock_tryguard(&lock) {
|
||||
// // critical section, entered only if the lock was acquired
|
||||
// }
|
||||
@(deferred_in_out = spinlock_tryguard_unlock)
|
||||
spinlock_tryguard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
|
||||
return spinlock_try_lock(lock)
|
||||
}
|
||||
|
||||
// Deferred companion of `spinlock_tryguard`; unlocks only when the lock was actually acquired.
|
||||
@(private)
|
||||
spinlock_tryguard_unlock :: #force_inline proc "contextless" (lock: ^Spinlock, locked: bool) {
|
||||
if locked {
|
||||
spinlock_unlock(lock)
|
||||
}
|
||||
}
|
||||
|
||||
lock :: proc {
|
||||
spinlock_lock,
|
||||
}
|
||||
|
||||
try_lock :: proc {
|
||||
spinlock_try_lock,
|
||||
}
|
||||
@@ -132,6 +174,14 @@ unlock :: proc {
|
||||
spinlock_unlock,
|
||||
}
|
||||
|
||||
guard :: proc {
|
||||
spinlock_guard,
|
||||
}
|
||||
|
||||
tryguard :: proc {
|
||||
spinlock_tryguard,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
// ----- Tests ------------------------
|
||||
// ---------------------------------------------------------------------------------------------------------------------
|
||||
@@ -139,10 +189,10 @@ import "core:sync"
|
||||
import "core:testing"
|
||||
import "core:thread"
|
||||
|
||||
// Multiple threads will each add 1.0 this many times.
|
||||
// If any updates are lost due to race conditions, the final sum will be wrong.
|
||||
@(test)
|
||||
test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
|
||||
// Multiple threads will each add 1.0 this many times.
|
||||
// If any updates are lost due to race conditions, the final sum will be wrong.
|
||||
NUM_THREADS :: 8
|
||||
ITERATIONS_PER_THREAD :: 10_000
|
||||
|
||||
@@ -184,10 +234,10 @@ test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
|
||||
testing.expect_value(t, shared_value, expected)
|
||||
}
|
||||
|
||||
// Start with a known value, multiple threads subtract.
|
||||
// If any updates are lost due to race conditions, the final result will be wrong.
|
||||
@(test)
|
||||
test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
|
||||
// Start with a known value, multiple threads subtract.
|
||||
// If any updates are lost due to race conditions, the final result will be wrong.
|
||||
NUM_THREADS :: 8
|
||||
ITERATIONS_PER_THREAD :: 10_000
|
||||
|
||||
@@ -228,11 +278,11 @@ test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
|
||||
testing.expect_value(t, shared_value, 0.0)
|
||||
}
|
||||
|
||||
// Each thread multiplies by 2.0 then divides by 2.0.
|
||||
// Since these are inverses, the final value should equal the starting value
|
||||
// regardless of how operations interleave.
|
||||
@(test)
|
||||
test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
|
||||
// Each thread multiplies by 2.0 then divides by 2.0.
|
||||
// Since these are inverses, the final value should equal the starting value
|
||||
// regardless of how operations interleave.
|
||||
NUM_THREADS :: 8
|
||||
ITERATIONS_PER_THREAD :: 10_000
|
||||
|
||||
@@ -274,10 +324,10 @@ test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
|
||||
testing.expect_value(t, shared_value, 1000.0)
|
||||
}
|
||||
|
||||
// Verify the f32 type dispatch works correctly under contention.
|
||||
// Same approach as the f64 add test but with f32.
|
||||
@(test)
|
||||
test_atomic_add_with_f32 :: proc(t: ^testing.T) {
|
||||
// Verify the f32 type dispatch works correctly under contention.
|
||||
// Same approach as the f64 add test but with f32.
|
||||
NUM_THREADS :: 8
|
||||
ITERATIONS_PER_THREAD :: 10_000
|
||||
|
||||
@@ -319,17 +369,17 @@ test_atomic_add_with_f32 :: proc(t: ^testing.T) {
|
||||
testing.expect_value(t, shared_value, expected)
|
||||
}
|
||||
|
||||
// Tests that the memory order passed to atomic_float_op's CAS success condition
|
||||
// provides full ordering guarantees for the entire float operation.
|
||||
//
|
||||
// Both sides use atomic_add_float (not raw intrinsics) to verify:
|
||||
// - Release on CAS success publishes prior non-atomic writes
|
||||
// - Acquire on CAS success makes those writes visible to the reader
|
||||
//
|
||||
// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
|
||||
// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
|
||||
@(test)
|
||||
test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
|
||||
// Tests that the memory order passed to atomic_float_op's CAS success condition
|
||||
// provides full ordering guarantees for the entire float operation.
|
||||
//
|
||||
// Both sides use atomic_add_float (not raw intrinsics) to verify:
|
||||
// - Release on CAS success publishes prior non-atomic writes
|
||||
// - Acquire on CAS success makes those writes visible to the reader
|
||||
//
|
||||
// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
|
||||
// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
|
||||
NUM_READERS :: 4
|
||||
|
||||
Shared_State :: struct {
|
||||
@@ -426,17 +476,20 @@ test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Stress test for every spinlock acquisition variant: N threads contend on a
|
||||
// single lock and perform a deliberate non-atomic read-modify-write on shared
|
||||
// data. Each iteration rotates through spinlock_try_lock, spinlock_lock,
|
||||
// spinlock_guard, and spinlock_tryguard so every variant runs concurrently and
|
||||
// must uphold mutual exclusion on the same lock.
|
||||
//
|
||||
// If mutual exclusion holds:
|
||||
// - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
|
||||
// - `concurrent_holders` never exceeds 1
|
||||
//
|
||||
// A multi-step RMW (read → relax → write) widens the critical section so
|
||||
// any failure to exclude is virtually guaranteed to corrupt the counter.
|
||||
@(test)
|
||||
test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
|
||||
// Stress test for spinlock_try_lock: N threads spin-acquire the lock and
|
||||
// perform a deliberate non-atomic read-modify-write on shared data.
|
||||
//
|
||||
// If mutual exclusion holds:
|
||||
// - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
|
||||
// - `concurrent_holders` never exceeds 1
|
||||
//
|
||||
// A multi-step RMW (read → relax → write) widens the critical section so
|
||||
// any failure to exclude is virtually guaranteed to corrupt the counter.
|
||||
test_spinlock_mutual_exclusion :: proc(t: ^testing.T) {
|
||||
NUM_THREADS :: 8
|
||||
ITERATIONS_PER_THREAD :: 50_000
|
||||
|
||||
@@ -461,21 +514,9 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
|
||||
barrier: sync.Barrier
|
||||
sync.barrier_init(&barrier, NUM_THREADS)
|
||||
|
||||
thread_proc :: proc(th: ^thread.Thread) {
|
||||
ctx := cast(^Thread_Data)th.data
|
||||
s := ctx.shared
|
||||
|
||||
// All threads rendezvous here for maximum contention.
|
||||
sync.barrier_wait(ctx.barrier)
|
||||
|
||||
for _ in 0 ..< ITERATIONS_PER_THREAD {
|
||||
// Spin on try_lock until we acquire it.
|
||||
for !spinlock_try_lock(&s.lock) {
|
||||
intrinsics.cpu_relax()
|
||||
}
|
||||
|
||||
// --- critical section start ---
|
||||
|
||||
// The single critical section every acquisition variant must protect. Sharing
|
||||
// it guarantees they all stress the exact same non-atomic read-modify-write.
|
||||
critical_section :: proc(s: ^Shared) {
|
||||
// Atomically bump the holder count so we can detect overlapping holders.
|
||||
holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed)
|
||||
|
||||
@@ -494,10 +535,44 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
|
||||
s.counter = val + 1
|
||||
|
||||
intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed)
|
||||
}
|
||||
|
||||
// --- critical section end ---
|
||||
thread_proc :: proc(th: ^thread.Thread) {
|
||||
ctx := cast(^Thread_Data)th.data
|
||||
s := ctx.shared
|
||||
|
||||
// All threads rendezvous here for maximum contention.
|
||||
sync.barrier_wait(ctx.barrier)
|
||||
|
||||
for i in 0 ..< ITERATIONS_PER_THREAD {
|
||||
// Rotate through every acquisition variant so they all contend on the
|
||||
// same lock simultaneously and must each uphold mutual exclusion.
|
||||
switch i & 3 {
|
||||
case 0:
|
||||
// Manual spin on try_lock until we acquire it.
|
||||
for !spinlock_try_lock(&s.lock) {
|
||||
intrinsics.cpu_relax()
|
||||
}
|
||||
critical_section(s)
|
||||
spinlock_unlock(&s.lock)
|
||||
case 1:
|
||||
// Blocking lock that loops internally until acquired.
|
||||
spinlock_lock(&s.lock)
|
||||
critical_section(s)
|
||||
spinlock_unlock(&s.lock)
|
||||
case 2: // Scoped guard: unlocks automatically at the end of the block.
|
||||
if spinlock_guard(&s.lock) {
|
||||
critical_section(s)
|
||||
}
|
||||
case 3: // Scoped try-guard: retry until acquired, auto-unlocks on success.
|
||||
for {
|
||||
if spinlock_tryguard(&s.lock) {
|
||||
critical_section(s)
|
||||
break
|
||||
}
|
||||
intrinsics.cpu_relax()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user