Spinlock features (#33)

Co-authored-by: Zachary Levy <zachary@sunforge.is> Reviewed-on: #33
2026-06-06 02:38:08 +00:00
parent 08f8a9d0b5
commit 962a814b84
1 changed files with 132 additions and 57 deletions
@@ -120,10 +120,52 @@ spinlock_try_lock :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool
 	return lock_acquired
 }

+// Spins until the lock is acquired, relaxing the CPU between attempts.
+spinlock_lock :: #force_inline proc "contextless" (lock: ^Spinlock) {
+	for !spinlock_try_lock(lock) {
+		intrinsics.cpu_relax()
+	}
+}
+
 spinlock_unlock :: #force_inline proc "contextless" (lock: ^Spinlock) {
 	intrinsics.atomic_store_explicit(lock, false, .Release)
 }

+// Spins until the lock is acquired, then unlocks at the end of the calling scope. Always returns
+// true so it can guard a critical section from within an `if`:
+//
+//	if spinlock_guard(&lock) {
+//		// critical section
+//	}
+@(deferred_in = spinlock_unlock)
+spinlock_guard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
+	spinlock_lock(lock)
+	return true
+}
+
+// Tries to acquire the lock once without spinning. Returns true and unlocks at the end of the
+// calling scope if acquired, otherwise returns false and does nothing:
+//
+//	if spinlock_tryguard(&lock) {
+//		// critical section, entered only if the lock was acquired
+//	}
+@(deferred_in_out = spinlock_tryguard_unlock)
+spinlock_tryguard :: #force_inline proc "contextless" (lock: ^Spinlock) -> bool {
+	return spinlock_try_lock(lock)
+}
+
+// Deferred companion of `spinlock_tryguard`; unlocks only when the lock was actually acquired.
+@(private)
+spinlock_tryguard_unlock :: #force_inline proc "contextless" (lock: ^Spinlock, locked: bool) {
+	if locked {
+		spinlock_unlock(lock)
+	}
+}
+
+lock :: proc {
+	spinlock_lock,
+}
+
 try_lock :: proc {
 	spinlock_try_lock,
 }
@@ -132,6 +174,14 @@ unlock :: proc {
 	spinlock_unlock,
 }

+guard :: proc {
+	spinlock_guard,
+}
+
+tryguard :: proc {
+	spinlock_tryguard,
+}
+
 // ---------------------------------------------------------------------------------------------------------------------
 // ----- Tests ------------------------
 // ---------------------------------------------------------------------------------------------------------------------
@@ -139,10 +189,10 @@ import "core:sync"
 import "core:testing"
 import "core:thread"

+// Multiple threads will each add 1.0 this many times.
+// If any updates are lost due to race conditions, the final sum will be wrong.
@(test)
 test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
-	// Multiple threads will each add 1.0 this many times.
-	// If any updates are lost due to race conditions, the final sum will be wrong.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 10_000

@@ -184,10 +234,10 @@ test_concurrent_atomic_add_no_lost_updates :: proc(t: ^testing.T) {
 	testing.expect_value(t, shared_value, expected)
 }

+// Start with a known value, multiple threads subtract.
+// If any updates are lost due to race conditions, the final result will be wrong.
@(test)
 test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
-	// Start with a known value, multiple threads subtract.
-	// If any updates are lost due to race conditions, the final result will be wrong.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 10_000

@@ -228,11 +278,11 @@ test_concurrent_atomic_sub_no_lost_updates :: proc(t: ^testing.T) {
 	testing.expect_value(t, shared_value, 0.0)
 }

+// Each thread multiplies by 2.0 then divides by 2.0.
+// Since these are inverses, the final value should equal the starting value
+// regardless of how operations interleave.
@(test)
 test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
-	// Each thread multiplies by 2.0 then divides by 2.0.
-	// Since these are inverses, the final value should equal the starting value
-	// regardless of how operations interleave.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 10_000

@@ -274,10 +324,10 @@ test_concurrent_atomic_mul_div_round_trip :: proc(t: ^testing.T) {
 	testing.expect_value(t, shared_value, 1000.0)
 }

+// Verify the f32 type dispatch works correctly under contention.
+// Same approach as the f64 add test but with f32.
@(test)
 test_atomic_add_with_f32 :: proc(t: ^testing.T) {
-	// Verify the f32 type dispatch works correctly under contention.
-	// Same approach as the f64 add test but with f32.
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 10_000

@@ -319,17 +369,17 @@ test_atomic_add_with_f32 :: proc(t: ^testing.T) {
 	testing.expect_value(t, shared_value, expected)
 }

+// Tests that the memory order passed to atomic_float_op's CAS success condition
+// provides full ordering guarantees for the entire float operation.
+//
+// Both sides use atomic_add_float (not raw intrinsics) to verify:
+// - Release on CAS success publishes prior non-atomic writes
+// - Acquire on CAS success makes those writes visible to the reader
+//
+// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
+// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
@(test)
 test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
-	// Tests that the memory order passed to atomic_float_op's CAS success condition
-	// provides full ordering guarantees for the entire float operation.
-	//
-	// Both sides use atomic_add_float (not raw intrinsics) to verify:
-	// - Release on CAS success publishes prior non-atomic writes
-	// - Acquire on CAS success makes those writes visible to the reader
-	//
-	// NOTE: This test may pass even with Relaxed ordering on x86 due to its strong memory model.
-	// On ARM or other weak-memory architectures, using Relaxed here would likely cause failures.
 	NUM_READERS :: 4

 	Shared_State :: struct {
@@ -426,17 +476,20 @@ test_atomic_release_acquire_publish_visibility :: proc(t: ^testing.T) {
 	}
 }

+// Stress test for every spinlock acquisition variant: N threads contend on a
+// single lock and perform a deliberate non-atomic read-modify-write on shared
+// data. Each iteration rotates through spinlock_try_lock, spinlock_lock,
+// spinlock_guard, and spinlock_tryguard so every variant runs concurrently and
+// must uphold mutual exclusion on the same lock.
+//
+// If mutual exclusion holds:
+//   - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
+//   - `concurrent_holders` never exceeds 1
+//
+// A multi-step RMW (read → relax → write) widens the critical section so
+// any failure to exclude is virtually guaranteed to corrupt the counter.
@(test)
-test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
-	// Stress test for spinlock_try_lock: N threads spin-acquire the lock and
-	// perform a deliberate non-atomic read-modify-write on shared data.
-	//
-	// If mutual exclusion holds:
-	//   - `counter` ends at exactly NUM_THREADS * ITERATIONS_PER_THREAD
-	//   - `concurrent_holders` never exceeds 1
-	//
-	// A multi-step RMW (read → relax → write) widens the critical section so
-	// any failure to exclude is virtually guaranteed to corrupt the counter.
+test_spinlock_mutual_exclusion :: proc(t: ^testing.T) {
 	NUM_THREADS :: 8
 	ITERATIONS_PER_THREAD :: 50_000

@@ -461,21 +514,9 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
 	barrier: sync.Barrier
 	sync.barrier_init(&barrier, NUM_THREADS)

-	thread_proc :: proc(th: ^thread.Thread) {
-		ctx := cast(^Thread_Data)th.data
-		s := ctx.shared
-
-		// All threads rendezvous here for maximum contention.
-		sync.barrier_wait(ctx.barrier)
-
-		for _ in 0 ..< ITERATIONS_PER_THREAD {
-			// Spin on try_lock until we acquire it.
-			for !spinlock_try_lock(&s.lock) {
-				intrinsics.cpu_relax()
-			}
-
-			// --- critical section start ---
-
+	// The single critical section every acquisition variant must protect. Sharing
+	// it guarantees they all stress the exact same non-atomic read-modify-write.
+	critical_section :: proc(s: ^Shared) {
 		// Atomically bump the holder count so we can detect overlapping holders.
 		holders := intrinsics.atomic_add_explicit(&s.concurrent_holders, 1, .Relaxed)

@@ -494,10 +535,44 @@ test_spinlock_try_lock_mutual_exclusion :: proc(t: ^testing.T) {
 		s.counter = val + 1

 		intrinsics.atomic_sub_explicit(&s.concurrent_holders, 1, .Relaxed)
+	}

-			// --- critical section end ---
+	thread_proc :: proc(th: ^thread.Thread) {
+		ctx := cast(^Thread_Data)th.data
+		s := ctx.shared

+		// All threads rendezvous here for maximum contention.
+		sync.barrier_wait(ctx.barrier)
+
+		for i in 0 ..< ITERATIONS_PER_THREAD {
+			// Rotate through every acquisition variant so they all contend on the
+			// same lock simultaneously and must each uphold mutual exclusion.
+			switch i & 3 {
+			case 0:
+				// Manual spin on try_lock until we acquire it.
+				for !spinlock_try_lock(&s.lock) {
+					intrinsics.cpu_relax()
+				}
+				critical_section(s)
 				spinlock_unlock(&s.lock)
+			case 1:
+				// Blocking lock that loops internally until acquired.
+				spinlock_lock(&s.lock)
+				critical_section(s)
+				spinlock_unlock(&s.lock)
+			case 2: // Scoped guard: unlocks automatically at the end of the block.
+					if spinlock_guard(&s.lock) {
+						critical_section(s)
+					}
+			case 3: // Scoped try-guard: retry until acquired, auto-unlocks on success.
+					for {
+						if spinlock_tryguard(&s.lock) {
+							critical_section(s)
+							break
+						}
+						intrinsics.cpu_relax()
+					}
+			}
 		}
 	}