2026-06-06 02:38:08 +00:00
1 changed files with 132 additions and 57 deletions
@@ -120,10 +120,52 @@ spinlock_try_lock :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool
 	return lock_acquired
 }
 // Spins until the lock is acquired, relaxing the CPU between attempts.
 spinlock_lock :: #force_inline proc "contextless" (lock: ^Spinlock) {
 	for !spinlock_try_lock(lock) {
 		intrinsics.cpu_relax()
 	}
 }
 spinlock_unlock :: #force_inline proc "contextless" (lock: ^Spinlock) {
 	intrinsics.atomic_store_explicit(lock, false, .Release)
 }
 // Spins until the lock is acquired, then unlocks at the end of the calling scope. Always returns
 // true so it can guard a critical section from within an `if`:
 //
 //	if spinlock_guard(&lock) {
 //		// critical section
 //	}
@(deferred_in = spinlock_unlock)
 spinlock_guard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
 	spinlock_lock(lock)
 	return true
 }
 // Tries to acquire the lock once without spinning. Returns true and unlocks at the end of the
 // calling scope if acquired, otherwise returns false and does nothing:
 //
 //	if spinlock_tryguard(&lock) {
 //		// critical section, entered only if the lock was acquired
 //	}
@(deferred_in_out = spinlock_tryguard_unlock)
 spinlock_tryguard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
 	return spinlock_try_lock(lock)
 }
 // Deferred companion of `spinlock_tryguard`; unlocks only when the lock was actually acquired.
@(private)
 spinlock_tryguard_unlock :: #force_inline proc "contextless" (lock: ^Spinlock, locked: bool) {
 	if locked {
 		spinlock_unlock(lock)
 	}
 }
 lock :: proc {
 	spinlock_lock,
 }
 try_lock :: proc {
 	spinlock_try_lock,
 }
@@ -132,6 +174,14 @@ unlock :: proc {
 	spinlock_unlock,
 }
 guard :: proc {
 	spinlock_guard,
 }
 tryguard :: proc {
 	spinlock_tryguard,
 }
 // ---------------------------------------------------------------------------------------------------------------------
 // ----- Tests ------------------------
 // ---------------------------------------------------------------------------------------------------------------------
@@ -139,10 +189,10 @@ import "core:sync"
 import "core:testing"
 import "core:thread"
 // Multiple threads will each add 1.0 this many times.
 // If any updates are lost due to race conditions, the final sum will be wrong.
@(test)
 test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
 	// Multiple threads will each add 1.0 this many times.
 	// If any updates are lost due to race conditions, the final sum will be wrong.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 10_000
@@ -184,10 +234,10 @@ test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
 	testing.expect_value(t, shared_value, expected)
 }
 // Start with a known value, multiple threads subtract.
 // If any updates are lost due to race conditions, the final result will be wrong.
@(test)
 test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
 	// Start with a known value, multiple threads subtract.
 	// If any updates are lost due to race conditions, the final result will be wrong.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 10_000
@@ -228,11 +278,11 @@ test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
 	testing.expect_value(t, shared_value, 0.0)
 }
 // Each thread multiplies by 2.0 then divides by 2.0.
 // Since these are inverses, the final value should equal the starting value
 // regardless of how operations interleave.
@(test)
 test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
 	// Each thread multiplies by 2.0 then divides by 2.0.
 	// Since these are inverses, the final value should equal the starting value
 	// regardless of how operations interleave.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 10_000
@@ -274,10 +324,10 @@ test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
 	testing.expect_value(t, shared_value, 1000.0)
 }
 // Verify the f32 type dispatch works correctly under contention.
 // Same approach as the f64 add test but with f32.
@(test)
 test_atomic_add_with_f32 :: proc(t: ^testing.T) {
 	// Verify the f32 type dispatch works correctly under contention.
 	// Same approach as the f64 add test but with f32.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 10_000
@@ -319,17 +369,17 @@ test_atomic_add_with_f32 :: proc(t: ^testing.T) {
 	testing.expect_value(t, shared_value, expected)
 }
 // Tests that the memory order passed to atomic_float_op's CAS success condition
 // provides full ordering guarantees for the entire float operation.
 //
 // Both sides use atomic_add_float (not raw intrinsics) to verify:
 // - Release on CAS success publishes prior non-atomic writes
 // - Acquire on CAS success makes those writes visible to the reader
 //
 // NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
 // On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
@(test)
 test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
 	// Tests that the memory order passed to atomic_float_op's CAS success condition
 	// provides full ordering guarantees for the entire float operation.
 	//
 	// Both sides use atomic_add_float (not raw intrinsics) to verify:
 	// - Release on CAS success publishes prior non-atomic writes
 	// - Acquire on CAS success makes those writes visible to the reader
 	//
 	// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
 	// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
 	NUM_READERS :: 4
 	Shared_State :: struct {
@@ -426,17 +476,20 @@ test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
 	}
 }
 // Stress test for every spinlock acquisition variant: N threads contend on a
 // single lock and perform a deliberate non-atomic read-modify-write on shared
 // data. Each iteration rotates through spinlock_try_lock, spinlock_lock,
 // spinlock_guard, and spinlock_tryguard so every variant runs concurrently and
 // must uphold mutual exclusion on the same lock.
 //
 // If mutual exclusion holds:
 //   - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
 //   - `concurrent_holders` never exceeds 1
 //
 // A multi-step RMW (read → relax → write) widens the critical section so
 // any failure to exclude is virtually guaranteed to corrupt the counter.
@(test)
-test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
+test_spinlock_mutual_exclusion :: proc(t: ^testing.T) {
 	// Stress test for spinlock_try_lock: N threads spin-acquire the lock and
 	// perform a deliberate non-atomic read-modify-write on shared data.
 	//
 	// If mutual exclusion holds:
 	//   - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
 	//   - `concurrent_holders` never exceeds 1
 	//
 	// A multi-step RMW (read → relax → write) widens the critical section so
 	// any failure to exclude is virtually guaranteed to corrupt the counter.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 50_000
@@ -461,6 +514,29 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
 	barrier: sync.Barrier
 	sync.barrier_init(&barrier, NUM_THREADS)
 	// The single critical section every acquisition variant must protect. Sharing
 	// it guarantees they all stress the exact same non-atomic read-modify-write.
 	critical_section :: proc(s: ^Shared) {
 		// Atomically bump the holder count so we can detect overlapping holders.
 		holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed)
 		// Track the maximum we ever observed (relaxed is fine, this is
 		// purely diagnostic and protected by the spinlock for writes).
 		if holders + 1 > s.max_holders {
 			s.max_holders = holders + 1
 		}
 		// Non-atomic RMW: read, spin a tiny bit, then write.
 		// This deliberately creates a wide window where a second holder
 		// would cause a lost update.
 		val := s.counter
 		intrinsics.cpu_relax()
 		intrinsics.cpu_relax()
 		s.counter = val + 1
 		intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed)
 	}
 	thread_proc :: proc(th: ^thread.Thread) {
 		ctx := cast(^Thread_Data)th.data
 		s := ctx.shared
@@ -468,36 +544,35 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
 		// All threads rendezvous here for maximum contention.
 		sync.barrier_wait(ctx.barrier)
-		for _ in 0 ..< ITERATIONS_PER_THREAD {
+		for i in 0 ..< ITERATIONS_PER_THREAD {
-			// Spin on try_lock until we acquire it.
+			// Rotate through every acquisition variant so they all contend on the
-			for !spinlock_try_lock(&s.lock) {
+			// same lock simultaneously and must each uphold mutual exclusion.
-				intrinsics.cpu_relax()
+			switch i & 3 {
 			case 0:
 				// Manual spin on try_lock until we acquire it.
 				for !spinlock_try_lock(&s.lock) {
 					intrinsics.cpu_relax()
 				}
 				critical_section(s)
 				spinlock_unlock(&s.lock)
 			case 1:
 				// Blocking lock that loops internally until acquired.
 				spinlock_lock(&s.lock)
 				critical_section(s)
 				spinlock_unlock(&s.lock)
 			case 2: // Scoped guard: unlocks automatically at the end of the block.
 					if spinlock_guard(&s.lock) {
 						critical_section(s)
 					}
 			case 3: // Scoped try-guard: retry until acquired, auto-unlocks on success.
 					for {
 						if spinlock_tryguard(&s.lock) {
 							critical_section(s)
 							break
 						}
 						intrinsics.cpu_relax()
 					}
 			}
 			// --- critical section start ---
 			// Atomically bump the holder count so we can detect overlapping holders.
 			holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed)
 			// Track the maximum we ever observed (relaxed is fine, this is
 			// purely diagnostic and protected by the spinlock for writes).
 			if holders + 1 > s.max_holders {
 				s.max_holders = holders + 1
 			}
 			// Non-atomic RMW: read, spin a tiny bit, then write.
 			// This deliberately creates a wide window where a second holder
 			// would cause a lost update.
 			val := s.counter
 			intrinsics.cpu_relax()
 			intrinsics.cpu_relax()
 			s.counter = val + 1
 			intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed)
 			// --- critical section end ---
 			spinlock_unlock(&s.lock)
 		}
 	}