Spinlock features #33
+132
-57
@@ -120,10 +120,52 @@ spinlock_try_lock :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool
|
|||||||
return lock_acquired
|
return lock_acquired
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Spins until the lock is acquired, relaxing the CPU between attempts.
|
||||||
|
spinlock_lock :: #force_inline proc "contextless" (lock: ^Spinlock) {
|
||||||
|
for !spinlock_try_lock(lock) {
|
||||||
|
intrinsics.cpu_relax()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
spinlock_unlock :: #force_inline proc "contextless" (lock: ^Spinlock) {
|
spinlock_unlock :: #force_inline proc "contextless" (lock: ^Spinlock) {
|
||||||
intrinsics.atomic_store_explicit(lock, false, .Release)
|
intrinsics.atomic_store_explicit(lock, false, .Release)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Spins until the lock is acquired, then unlocks at the end of the calling scope. Always returns
|
||||||
|
// true so it can guard a critical section from within an `if`:
|
||||||
|
//
|
||||||
|
// if spinlock_guard(&lock) {
|
||||||
|
// // critical section
|
||||||
|
// }
|
||||||
|
@(deferred_in = spinlock_unlock)
|
||||||
|
spinlock_guard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
|
||||||
|
spinlock_lock(lock)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tries to acquire the lock once without spinning. Returns true and unlocks at the end of the
|
||||||
|
// calling scope if acquired, otherwise returns false and does nothing:
|
||||||
|
//
|
||||||
|
// if spinlock_tryguard(&lock) {
|
||||||
|
// // critical section, entered only if the lock was acquired
|
||||||
|
// }
|
||||||
|
@(deferred_in_out = spinlock_tryguard_unlock)
|
||||||
|
spinlock_tryguard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
|
||||||
|
return spinlock_try_lock(lock)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deferred companion of `spinlock_tryguard`; unlocks only when the lock was actually acquired.
|
||||||
|
@(private)
|
||||||
|
spinlock_tryguard_unlock :: #force_inline proc "contextless" (lock: ^Spinlock, locked: bool) {
|
||||||
|
if locked {
|
||||||
|
spinlock_unlock(lock)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lock :: proc {
|
||||||
|
spinlock_lock,
|
||||||
|
}
|
||||||
|
|
||||||
try_lock :: proc {
|
try_lock :: proc {
|
||||||
spinlock_try_lock,
|
spinlock_try_lock,
|
||||||
}
|
}
|
||||||
@@ -132,6 +174,14 @@ unlock :: proc {
|
|||||||
spinlock_unlock,
|
spinlock_unlock,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
guard :: proc {
|
||||||
|
spinlock_guard,
|
||||||
|
}
|
||||||
|
|
||||||
|
tryguard :: proc {
|
||||||
|
spinlock_tryguard,
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------------------
|
||||||
// ----- Tests ------------------------
|
// ----- Tests ------------------------
|
||||||
// ---------------------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------------------
|
||||||
@@ -139,10 +189,10 @@ import "core:sync"
|
|||||||
import "core:testing"
|
import "core:testing"
|
||||||
import "core:thread"
|
import "core:thread"
|
||||||
|
|
||||||
|
// Multiple threads will each add 1.0 this many times.
|
||||||
|
// If any updates are lost due to race conditions, the final sum will be wrong.
|
||||||
@(test)
|
@(test)
|
||||||
test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
|
test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
|
||||||
// Multiple threads will each add 1.0 this many times.
|
|
||||||
// If any updates are lost due to race conditions, the final sum will be wrong.
|
|
||||||
NUM_THREADS :: 8
|
NUM_THREADS :: 8
|
||||||
ITERATIONS_PER_THREAD :: 10_000
|
ITERATIONS_PER_THREAD :: 10_000
|
||||||
|
|
||||||
@@ -184,10 +234,10 @@ test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
|
|||||||
testing.expect_value(t, shared_value, expected)
|
testing.expect_value(t, shared_value, expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start with a known value, multiple threads subtract.
|
||||||
|
// If any updates are lost due to race conditions, the final result will be wrong.
|
||||||
@(test)
|
@(test)
|
||||||
test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
|
test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
|
||||||
// Start with a known value, multiple threads subtract.
|
|
||||||
// If any updates are lost due to race conditions, the final result will be wrong.
|
|
||||||
NUM_THREADS :: 8
|
NUM_THREADS :: 8
|
||||||
ITERATIONS_PER_THREAD :: 10_000
|
ITERATIONS_PER_THREAD :: 10_000
|
||||||
|
|
||||||
@@ -228,11 +278,11 @@ test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
|
|||||||
testing.expect_value(t, shared_value, 0.0)
|
testing.expect_value(t, shared_value, 0.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Each thread multiplies by 2.0 then divides by 2.0.
|
||||||
|
// Since these are inverses, the final value should equal the starting value
|
||||||
|
// regardless of how operations interleave.
|
||||||
@(test)
|
@(test)
|
||||||
test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
|
test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
|
||||||
// Each thread multiplies by 2.0 then divides by 2.0.
|
|
||||||
// Since these are inverses, the final value should equal the starting value
|
|
||||||
// regardless of how operations interleave.
|
|
||||||
NUM_THREADS :: 8
|
NUM_THREADS :: 8
|
||||||
ITERATIONS_PER_THREAD :: 10_000
|
ITERATIONS_PER_THREAD :: 10_000
|
||||||
|
|
||||||
@@ -274,10 +324,10 @@ test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
|
|||||||
testing.expect_value(t, shared_value, 1000.0)
|
testing.expect_value(t, shared_value, 1000.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify the f32 type dispatch works correctly under contention.
|
||||||
|
// Same approach as the f64 add test but with f32.
|
||||||
@(test)
|
@(test)
|
||||||
test_atomic_add_with_f32 :: proc(t: ^testing.T) {
|
test_atomic_add_with_f32 :: proc(t: ^testing.T) {
|
||||||
// Verify the f32 type dispatch works correctly under contention.
|
|
||||||
// Same approach as the f64 add test but with f32.
|
|
||||||
NUM_THREADS :: 8
|
NUM_THREADS :: 8
|
||||||
ITERATIONS_PER_THREAD :: 10_000
|
ITERATIONS_PER_THREAD :: 10_000
|
||||||
|
|
||||||
@@ -319,17 +369,17 @@ test_atomic_add_with_f32 :: proc(t: ^testing.T) {
|
|||||||
testing.expect_value(t, shared_value, expected)
|
testing.expect_value(t, shared_value, expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tests that the memory order passed to atomic_float_op's CAS success condition
|
||||||
|
// provides full ordering guarantees for the entire float operation.
|
||||||
|
//
|
||||||
|
// Both sides use atomic_add_float (not raw intrinsics) to verify:
|
||||||
|
// - Release on CAS success publishes prior non-atomic writes
|
||||||
|
// - Acquire on CAS success makes those writes visible to the reader
|
||||||
|
//
|
||||||
|
// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
|
||||||
|
// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
|
||||||
@(test)
|
@(test)
|
||||||
test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
|
test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
|
||||||
// Tests that the memory order passed to atomic_float_op's CAS success condition
|
|
||||||
// provides full ordering guarantees for the entire float operation.
|
|
||||||
//
|
|
||||||
// Both sides use atomic_add_float (not raw intrinsics) to verify:
|
|
||||||
// - Release on CAS success publishes prior non-atomic writes
|
|
||||||
// - Acquire on CAS success makes those writes visible to the reader
|
|
||||||
//
|
|
||||||
// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
|
|
||||||
// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
|
|
||||||
NUM_READERS :: 4
|
NUM_READERS :: 4
|
||||||
|
|
||||||
Shared_State :: struct {
|
Shared_State :: struct {
|
||||||
@@ -426,17 +476,20 @@ test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stress test for every spinlock acquisition variant: N threads contend on a
|
||||||
|
// single lock and perform a deliberate non-atomic read-modify-write on shared
|
||||||
|
// data. Each iteration rotates through spinlock_try_lock, spinlock_lock,
|
||||||
|
// spinlock_guard, and spinlock_tryguard so every variant runs concurrently and
|
||||||
|
// must uphold mutual exclusion on the same lock.
|
||||||
|
//
|
||||||
|
// If mutual exclusion holds:
|
||||||
|
// - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
|
||||||
|
// - `concurrent_holders` never exceeds 1
|
||||||
|
//
|
||||||
|
// A multi-step RMW (read → relax → write) widens the critical section so
|
||||||
|
// any failure to exclude is virtually guaranteed to corrupt the counter.
|
||||||
@(test)
|
@(test)
|
||||||
test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
|
test_spinlock_mutual_exclusion :: proc(t: ^testing.T) {
|
||||||
// Stress test for spinlock_try_lock: N threads spin-acquire the lock and
|
|
||||||
// perform a deliberate non-atomic read-modify-write on shared data.
|
|
||||||
//
|
|
||||||
// If mutual exclusion holds:
|
|
||||||
// - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
|
|
||||||
// - `concurrent_holders` never exceeds 1
|
|
||||||
//
|
|
||||||
// A multi-step RMW (read → relax → write) widens the critical section so
|
|
||||||
// any failure to exclude is virtually guaranteed to corrupt the counter.
|
|
||||||
NUM_THREADS :: 8
|
NUM_THREADS :: 8
|
||||||
ITERATIONS_PER_THREAD :: 50_000
|
ITERATIONS_PER_THREAD :: 50_000
|
||||||
|
|
||||||
@@ -461,6 +514,29 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
|
|||||||
barrier: sync.Barrier
|
barrier: sync.Barrier
|
||||||
sync.barrier_init(&barrier, NUM_THREADS)
|
sync.barrier_init(&barrier, NUM_THREADS)
|
||||||
|
|
||||||
|
// The single critical section every acquisition variant must protect. Sharing
|
||||||
|
// it guarantees they all stress the exact same non-atomic read-modify-write.
|
||||||
|
critical_section :: proc(s: ^Shared) {
|
||||||
|
// Atomically bump the holder count so we can detect overlapping holders.
|
||||||
|
holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed)
|
||||||
|
|
||||||
|
// Track the maximum we ever observed (relaxed is fine, this is
|
||||||
|
// purely diagnostic and protected by the spinlock for writes).
|
||||||
|
if holders + 1 > s.max_holders {
|
||||||
|
s.max_holders = holders + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-atomic RMW: read, spin a tiny bit, then write.
|
||||||
|
// This deliberately creates a wide window where a second holder
|
||||||
|
// would cause a lost update.
|
||||||
|
val := s.counter
|
||||||
|
intrinsics.cpu_relax()
|
||||||
|
intrinsics.cpu_relax()
|
||||||
|
s.counter = val + 1
|
||||||
|
|
||||||
|
intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
thread_proc :: proc(th: ^thread.Thread) {
|
thread_proc :: proc(th: ^thread.Thread) {
|
||||||
ctx := cast(^Thread_Data)th.data
|
ctx := cast(^Thread_Data)th.data
|
||||||
s := ctx.shared
|
s := ctx.shared
|
||||||
@@ -468,36 +544,35 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
|
|||||||
// All threads rendezvous here for maximum contention.
|
// All threads rendezvous here for maximum contention.
|
||||||
sync.barrier_wait(ctx.barrier)
|
sync.barrier_wait(ctx.barrier)
|
||||||
|
|
||||||
for _ in 0 ..< ITERATIONS_PER_THREAD {
|
for i in 0 ..< ITERATIONS_PER_THREAD {
|
||||||
// Spin on try_lock until we acquire it.
|
// Rotate through every acquisition variant so they all contend on the
|
||||||
for !spinlock_try_lock(&s.lock) {
|
// same lock simultaneously and must each uphold mutual exclusion.
|
||||||
intrinsics.cpu_relax()
|
switch i & 3 {
|
||||||
|
case 0:
|
||||||
|
// Manual spin on try_lock until we acquire it.
|
||||||
|
for !spinlock_try_lock(&s.lock) {
|
||||||
|
intrinsics.cpu_relax()
|
||||||
|
}
|
||||||
|
critical_section(s)
|
||||||
|
spinlock_unlock(&s.lock)
|
||||||
|
case 1:
|
||||||
|
// Blocking lock that loops internally until acquired.
|
||||||
|
spinlock_lock(&s.lock)
|
||||||
|
critical_section(s)
|
||||||
|
spinlock_unlock(&s.lock)
|
||||||
|
case 2: // Scoped guard: unlocks automatically at the end of the block.
|
||||||
|
if spinlock_guard(&s.lock) {
|
||||||
|
critical_section(s)
|
||||||
|
}
|
||||||
|
case 3: // Scoped try-guard: retry until acquired, auto-unlocks on success.
|
||||||
|
for {
|
||||||
|
if spinlock_tryguard(&s.lock) {
|
||||||
|
critical_section(s)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
intrinsics.cpu_relax()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- critical section start ---
|
|
||||||
|
|
||||||
// Atomically bump the holder count so we can detect overlapping holders.
|
|
||||||
holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed)
|
|
||||||
|
|
||||||
// Track the maximum we ever observed (relaxed is fine, this is
|
|
||||||
// purely diagnostic and protected by the spinlock for writes).
|
|
||||||
if holders + 1 > s.max_holders {
|
|
||||||
s.max_holders = holders + 1
|
|
||||||
}
|
|
||||||
|
|
||||||
// Non-atomic RMW: read, spin a tiny bit, then write.
|
|
||||||
// This deliberately creates a wide window where a second holder
|
|
||||||
// would cause a lost update.
|
|
||||||
val := s.counter
|
|
||||||
intrinsics.cpu_relax()
|
|
||||||
intrinsics.cpu_relax()
|
|
||||||
s.counter = val + 1
|
|
||||||
|
|
||||||
intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed)
|
|
||||||
|
|
||||||
// --- critical section end ---
|
|
||||||
|
|
||||||
spinlock_unlock(&s.lock)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user