diff --git a/.zed/tasks.json b/.zed/tasks.json index 8b14508..a9be01a 100644 --- a/.zed/tasks.json +++ b/.zed/tasks.json @@ -75,6 +75,16 @@ "command": "odin run draw/examples -debug -out=out/debug/draw-examples -- textures", "cwd": "$ZED_WORKTREE_ROOT", }, + { + "label": "Run draw gaussian-blur example", + "command": "odin run draw/examples -debug -out=out/debug/draw-examples -- gaussian-blur", + "cwd": "$ZED_WORKTREE_ROOT", + }, + { + "label": "Run draw gaussian-blur-debug example", + "command": "odin run draw/examples -debug -out=out/debug/draw-examples -- gaussian-blur-debug", + "cwd": "$ZED_WORKTREE_ROOT", + }, { "label": "Run qrcode basic example", "command": "odin run qrcode/examples -debug -out=out/debug/qrcode-examples -- basic", diff --git a/draw/README.md b/draw/README.md index a04d09a..1505177 100644 --- a/draw/README.md +++ b/draw/README.md @@ -5,54 +5,60 @@ Clay UI integration. ## Current state -The renderer uses a single unified `Pipeline_2D_Base` (`TRIANGLELIST` pipeline) with two submission +The renderer uses a single unified `Core_2D` (`TRIANGLELIST` pipeline) with two submission modes dispatched by a push constant: - **Mode 0 (Tessellated):** Vertex buffer contains real geometry. Used for text (indexed draws into - SDL_ttf atlas textures), single-pixel points (`tes_pixel`), arbitrary user geometry (`tes_triangle`, - `tes_triangle_fan`, `tes_triangle_strip`), and shapes without a closed-form rounded-rectangle - reduction: ellipses (`tes_ellipse`), regular polygons (`tes_polygon`), and circle sectors - (`tes_sector`). The fragment shader computes `out = color * texture(tex, uv)`. + SDL_ttf atlas textures), single-pixel points (`tess.pixel`), arbitrary user geometry + (`tess.triangle`, `tess.triangle_aa`, `tess.triangle_lines`, `tess.triangle_fan`, + `tess.triangle_strip`), and any raw vertex geometry submitted via `prepare_shape`. The fragment + shader premultiplies the texture sample (`t.rgb *= t.a`) and computes `out = color * t`. - **Mode 1 (SDF):** A static 6-vertex unit-quad buffer is drawn instanced, with per-primitive - `Primitive` structs (80 bytes each) uploaded each frame to a GPU storage buffer. The vertex shader - reads `primitives[gl_InstanceIndex]`, computes world-space position from unit quad corners + - primitive bounds. The fragment shader always evaluates `sdRoundedBox` — there is no per-primitive - kind dispatch. + `Core_2D_Primitive` structs (96 bytes each) uploaded each frame to a GPU storage buffer. The vertex + shader reads `primitives[gl_InstanceIndex]`, computes world-space position from unit quad corners + + primitive bounds. The fragment shader dispatches on `Shape_Kind` (encoded in the low byte of + `Core_2D_Primitive.flags`) to evaluate one of four signed distance functions: + - **RRect** (kind 1) — `sdRoundedBox` with per-corner radii. Covers rectangles (sharp or rounded), + circles (uniform radii = half-size), and line segments / capsules (rotated RRect with uniform + radii = half-thickness). Covers filled, outlined, textured, and gradient-filled variants. + - **NGon** (kind 2) — `sdRegularPolygon` for regular N-sided polygons. + - **Ellipse** (kind 3) — `sdEllipseApprox`, an approximate ellipse SDF suitable for UI rendering. + - **Ring_Arc** (kind 4) — annular ring with optional angular clipping via pre-computed edge + normals. Covers full rings, partial arcs, and pie slices (`inner_radius = 0`). -The SDF path handles all shapes that are algebraically reducible to a rounded rectangle: - -- **Rounded rectangles** — per-corner radii via `sdRoundedBox` (iq). Covers filled, stroked, - textured, and gradient-filled rectangles. -- **Circles** — uniform radii equal to half-size. Covers filled, stroked, and radial-gradient circles. -- **Line segments / capsules** — rotated RRect with uniform radii equal to half-thickness (stadium shape). -- **Full rings / annuli** — stroked circle (mid-radius with stroke thickness = outer - inner). - -All SDF shapes support fill, stroke, solid color, bilinear 4-corner gradients, radial 2-color -gradients, and texture fills via `Shape_Flags`. Gradient colors are packed into the same 16 bytes as -the texture UV rect via a `Uv_Or_Gradient` raw union — zero size increase to the 80-byte `Primitive` -struct. Gradient and texture are mutually exclusive. +All SDF shapes support fill, outline, solid color, 2-color linear gradients, 2-color radial +gradients, and texture fills via `Shape_Flags` (see `core_2d.odin`). The texture UV rect +(`uv_rect: [4]f32`) and the gradient/outline parameters (`effects: Gradient_Outline`) live in their +own 16-byte slots in `Core_2D_Primitive`, so a primitive can carry texture and outline simultaneously. +Gradient and texture remain mutually exclusive at the fill-source level (a Brush variant chooses one +or the other) since they share the worst-case fragment-shader register path. All SDF shapes produce mathematically exact curves with analytical anti-aliasing via `smoothstep` — -no tessellation, no piecewise-linear approximation. A rounded rectangle is 1 primitive (80 bytes) +no tessellation, no piecewise-linear approximation. A rounded rectangle is 1 primitive (96 bytes) instead of ~250 vertices (~5000 bytes). -The fragment shader's estimated register footprint is ~20–23 VGPRs via static live-range analysis. -RRect and Ring_Arc are roughly tied at peak pressure — RRect carries `corner_radii` (4 regs) plus -`sdRoundedBox` temporaries, Ring_Arc carries wedge normals plus dot-product temporaries. Both land -comfortably under Mali Valhall's 32-register occupancy cliff (G57/G77/G78 and later) and well under -desktop limits. On older Bifrost Mali (G71/G72/G76, 16-register cliff) either shape kind may incur -partial occupancy reduction. These estimates are hand-counted; exact numbers require `malioc` or -Radeon GPU Analyzer against the compiled SPIR-V. +The main pipeline's register budget is **≤24 registers** (see "Main/effects split: register pressure" +in the pipeline plan below for the full cliff/margin analysis and SBC architecture context). +The fragment shader's estimated peak footprint is ~22–26 fp32 VGPRs (~16–22 fp16 VGPRs on architectures +with native mediump) via manual live-range analysis. The dominant peak is the Ring_Arc kind path +(wedge normals + inner/outer radii + dot-product temporaries live simultaneously with carried state +like `f_color`, `f_uv_rect`/`f_effects`, and `half_size`). RRect is 1–2 regs lower (`corner_radii` vec4 +replaces the separate inner/outer + normal pairs). NGon and Ellipse are lighter still. Real compilers +apply live-range coalescing, mediump-to-fp16 promotion, and rematerialization that typically shave +2–4 regs from hand-counted estimates — the conservative 26-reg upper bound is expected to compile +down to within the 24-register budget, but this must be verified with `malioc` (see "Verifying +register counts" below). On V3D and Bifrost architectures (16-register cliff), the compiler +statically allocates registers for the worst-case path (Ring_Arc) regardless of which kind any given +fragment actually evaluates, so all fragments pay the occupancy cost of the heaviest branch. This is +a documented limitation, not a design constraint (see "Known limitations: V3D and Bifrost" below). -MSAA is opt-in (default `._1`, no MSAA) via `Init_Options.msaa_samples`. SDF rendering does not -benefit from MSAA because fragment coverage is computed analytically. MSAA remains useful for text -glyph edges and tessellated user geometry if desired. - -All public drawing procs use prefixed names for clarity: `sdf_*` for SDF-path shapes, `tes_*` for -tessellated-path shapes. Proc groups provide a single entry point per shape concept (e.g., -`sdf_rectangle` dispatches to `sdf_rectangle_solid` or `sdf_rectangle_gradient` based on argument -count). +MSAA is intentionally not supported. SDF text and shapes compute fragment coverage analytically +via `smoothstep`, so they don't benefit from multisampling. Tessellated user geometry submitted via +`prepare_shape` is rendered without anti-aliasing — if AA is required for tessellated content, the +caller must render it to their own offscreen target and submit the result as a texture. This +decision matches RAD Debugger's architecture and aligns with the SBC target (Mali Valhall, where +MSAA's per-tile bandwidth multiplier is expensive). ## 2D rendering pipeline plan @@ -66,22 +72,23 @@ primitives and effects can be added to the library without architectural changes The 2D renderer uses three GPU pipelines, split by **register pressure** (main vs effects) and **render-pass structure** (everything vs backdrop): -1. **Main pipeline** — shapes (SDF and tessellated), text, and textured rectangles. Low register - footprint (~18–24 registers per thread). Runs at full GPU occupancy on every architecture. - Handles 90%+ of all fragments in a typical frame. +1. **Main pipeline** — shapes (SDF and tessellated), text, and textured rectangles. Register budget: + **≤24 registers** (full occupancy on Valhall and all desktop GPUs). Handles 90%+ of all fragments + in a typical frame. 2. **Effects pipeline** — drop shadows, inner shadows, outer glow, and similar ALU-bound blur - effects. Medium register footprint (~48–60 registers). Each effects primitive includes the base + effects. Register budget: **≤56 registers** (targets Valhall's second cliff at 64; reduced + occupancy at the first cliff is accepted by design). Each effects primitive includes the base shape's SDF so that it can draw both the effect and the shape in a single fragment pass, avoiding redundant overdraw. Separated from the main pipeline to protect main-pipeline occupancy on low-end hardware (see register analysis below). 3. **Backdrop pipeline** — frosted glass, refraction, and any effect that samples the current render target as input. Implemented as a multi-pass sequence (downsample, separable blur, composite), - where each individual pass has a low-to-medium register footprint (~15–40 registers). Separated - from the other pipelines because it structurally requires ending the current render pass and - copying the render target before any backdrop-sampling fragment can execute — a command-buffer- - level boundary that cannot be avoided regardless of shader complexity. + where each individual sub-pass has a register budget of **≤24 registers** (full occupancy on + Valhall). Separated from the other pipelines because it structurally requires ending the current + render pass and copying the render target before any backdrop-sampling fragment can execute — a + command-buffer-level boundary that cannot be avoided regardless of shader complexity. A typical UI frame with no effects uses 1 pipeline bind and 0 switches. A frame with drop shadows uses 2 pipelines and 1 switch. A frame with shadows and frosted glass uses all 3 pipelines and 2 @@ -97,56 +104,113 @@ code) or many per-primitive-type pipelines (no branching overhead, lean per-shad A GPU shader core has a fixed register pool shared among all concurrent threads. The compiler allocates registers pessimistically based on the worst-case path through the shader. If the shader -contains both a 20-register RRect SDF and a 48-register drop-shadow blur, _every_ fragment — even -trivial RRects — is allocated 48 registers. This directly reduces **occupancy** (the number of +contains both a 24-register RRect SDF and a 56-register drop-shadow blur, _every_ fragment — even +trivial RRects — is allocated 56 registers. This directly reduces **occupancy** (the number of warps/wavefronts that can run simultaneously), which reduces the GPU's ability to hide memory latency. -Each GPU architecture has a **register cliff** — a threshold above which occupancy starts dropping. -Below the cliff, adding registers has zero occupancy cost. +Each GPU architecture has discrete **occupancy cliffs** — register counts above which the number of +concurrent threads drops in a step. Below the cliff, adding registers has zero occupancy cost. One +register over, throughput drops sharply. -On consumer Ampere/Ada GPUs (RTX 30xx/40xx, 65,536 regs/SM, max 1,536 threads/SM, cliff at ~43 regs): +**Target architecture: ARM Mali Valhall (32-register first cliff).** The binding constraint for our +register budgets comes from the SBC (single-board computer) market, where Mali Valhall is the +dominant current GPU architecture: -| Register allocation | Reg-limited threads | Actual (hw-capped) | Occupancy | -| ------------------------ | ------------------- | ------------------ | --------- | -| ~16 regs (main pipeline) | 4,096 | 1,536 | 100% | -| 32 regs | 2,048 | 1,536 | 100% | -| 48 regs (effects) | 1,365 | 1,365 | ~89% | +- **RK3588-class boards** (Orange Pi 5, Radxa Rock 5, Khadas Edge 2, NanoPi R6, Banana Pi M7) ship + **Mali-G610** (Valhall). This is the dominant non-Pi SBC platform. First occupancy cliff at **32 + registers**, second cliff at **64 registers**. +- **ARM Mali Valhall** (G57, G77, G78, G610, G710, G715; 2019+) and **5th-gen / Mali-G1** (2024+): + same cliff structure — first at 32, second at 64. +- **ARM Mali Bifrost** (G31, G51, G52, G71, G72, G76; ~2016–2018): first cliff at **16 registers**. + Legacy; found on older budget boards (Allwinner H6/H618, Amlogic S922X). See Known limitations + below. +- **Broadcom V3D 4.x / 7.x** (Raspberry Pi 4 / Pi 5): first cliff at **16 registers**. Outlier in + the current SBC market. See Known limitations below. +- **Apple M3+**: Dynamic Caching (register file virtualization) eliminates the static cliff entirely. + Register allocation happens at runtime based on actual usage. +- **Qualcomm Adreno**: dynamic register allocation with soft thresholds; no hard cliff. +- **NVIDIA desktop** (Ampere/Ada): cliff at ~43 registers. Not a constraint for any of our pipelines. -On Volta/A100 GPUs (65,536 regs/SM, max 2,048 threads/SM, cliff at ~32 regs): +**Register budgets and margin.** We target Valhall's 32-register first cliff for the main and +backdrop pipelines, and Valhall's 64-register second cliff for the effects pipeline, each with **8 +registers of margin**: -| Register allocation | Reg-limited threads | Actual (hw-capped) | Occupancy | -| ------------------------ | ------------------- | ------------------ | --------- | -| ~16 regs (main pipeline) | 4,096 | 2,048 | 100% | -| 32 regs | 2,048 | 2,048 | 100% | -| 48 regs (effects) | 1,365 | 1,365 | ~67% | +| Pipeline | Cliff targeted | Margin | Register budget | Rationale | +| ------------------- | ---------------------- | ------ | ----------------- | --------------------------------------------------------------------------------------------- | +| Main pipeline | 32 (Valhall 1st cliff) | 8 | **≤24 regs** | Handles 90%+ of frame fragments; must run at full occupancy | +| Backdrop sub-passes | 32 (Valhall 1st cliff) | 8 | **≤24 regs** each | Multi-pass structure keeps each pass small; no reason to give up occupancy | +| Effects pipeline | 64 (Valhall 2nd cliff) | 8 | **≤56 regs** | Reduced occupancy at 1st cliff accepted by design — the entire point of splitting effects out | -On low-end mobile (ARM Mali Bifrost/Valhall, 64 regs/thread, cliff fixed at 32 regs): +**Why 8 registers of margin.** Targeting the cliff exactly is fragile. Three forces push register +counts upward over a shader's lifetime: -| Register allocation | Occupancy | -| -------------------- | -------------------------- | -| 0–32 regs (main) | 100% (full thread count) | -| 33–64 regs (effects) | ~50% (thread count halves) | +1. **Compiler version changes.** Mali driver releases (r35p0 → r55p0 etc.) ship new register + allocators. Shaders typically drift ±2–3 registers between versions on unchanged source. +2. **Feature additions.** Each new effect, flag, or uniform adds 1–4 live registers. A new gradient + mode or outline option lands in this range. +3. **Precision regressions.** A `mediump` demoted to `highp` (by bug fix, compiler heuristic change, + or a contributor not knowing) costs 2 registers per affected `vec4`. -Mali's cliff at 32 registers is the binding constraint. On desktop the occupancy difference between -20 and 48 registers is modest (89–100%); on Mali it is a hard 2× throughput reduction. The -main/effects split protects 90%+ of a frame's fragments (shapes, text, textures) from the effects -pipeline's register cost. +Realistic creep over a couple of years is 4–8 registers. The cost of conservatism is zero — a shader +at 24 regs runs identically to one at 32 on every Valhall device. The cost of crossing the cliff is +a 2× throughput drop with no warning. Asymmetric costs justify a generous margin. -For the effects pipeline's drop-shadow shader — erf-approximation blur math with several texture -fetches — 50% occupancy on Mali roughly halves throughput. At 4K with 1.5× overdraw (~12.4M +**Why the main/effects split exists.** If the main pipeline shader contained both the 24-register +SDF path and the ~50-register drop-shadow blur, every fragment — even trivial RRects — would be +allocated ~50 registers. On Valhall this crosses the 32-register first cliff, halving occupancy for +90%+ of the frame's fragments. Separating effects into their own pipeline means the main pipeline +stays at ≤24 registers (full Valhall occupancy), and only the small fraction of fragments that +actually render effects (~5–10% in a typical UI) run at reduced occupancy. + +For the effects pipeline's drop-shadow shader — analytical erf-approximation blur (~80 FLOPs, no +texture samples) — 50% occupancy on Valhall roughly halves throughput. At 4K with 1.5× overdraw (~12.4M fragments), a single unified shader containing the shadow branch would cost ~4ms instead of ~2ms on -low-end mobile. This is a per-frame multiplier even when the heavy branch is never taken, because the +Valhall. This is a per-frame multiplier even when the heavy branch is never taken, because the compiler allocates registers for the worst-case path. -All main-pipeline members (SDF shapes, tessellated geometry, text, textured rectangles) cluster at -12–24 registers — below the cliff on every architecture — so unifying them costs nothing in -occupancy. +The effects pipeline's ≤56-register budget keeps it under Valhall's second cliff at 64, yielding +50–67% occupancy on effected shapes. This is acceptable for the small fraction of frame fragments +that effects cover. -**Note on Apple M3+ GPUs:** Apple's M3 introduces Dynamic Caching (register file virtualization), -which allocates registers at runtime based on actual usage rather than worst-case. This weakens the -static register-pressure argument on M3 and later, but the split remains useful for isolating blur -ALU complexity and keeping the backdrop texture-copy out of the main render pass. +**Note on Apple M3+ GPUs:** Apple's M3 Dynamic Caching allocates registers at runtime based on +actual usage rather than worst-case. This eliminates the static register-pressure argument on M3 and +later, but the split remains useful for isolating blur ALU complexity and keeping the backdrop +texture-copy out of the main render pass. + +**Note on NVIDIA desktop GPUs:** On consumer Ampere/Ada (cliff at ~43 regs), even the effects +pipeline's ≤56-register budget only reduces occupancy to ~89% — well within noise. On Volta/A100 +(cliff at ~32 regs), the effects pipeline drops to ~67%. In both cases the main pipeline runs at +100% occupancy. Desktop GPUs are not the binding constraint; Valhall is. + +#### Known limitations: V3D and Bifrost (16-register cliff) + +Broadcom V3D 4.x / 7.x (Raspberry Pi 4 / Pi 5) and ARM Mali Bifrost (G31, G51, G52, G71, G72, G76) +have a first occupancy cliff at **16 registers**. All three of our pipelines exceed this cliff — even +the main pipeline's ≤24-register budget is above 16. On these architectures, every shader runs at +reduced occupancy regardless of which shape kind or effect is active. + +Restoring full occupancy on V3D / Bifrost would require a fundamentally different shader +architecture: per-shape-kind pipeline splitting (one pipeline per SDF kind, each with a minimal +register footprint under 16). This conflicts with the unified-pipeline design that enables single +draw calls per scissor, submission-order Z preservation, and low PSO compilation cost. It would +effectively be the GPUI-style approach whose tradeoffs are analyzed in "Why not per-primitive-type +pipelines" below. + +We treat this as a documented limitation, not a design constraint. The 16-register cliff is legacy +(Bifrost) or a single-vendor outlier (V3D). The dominant current SBC platform (RK3588 / Mali-G610) +and all mainstream mobile and desktop GPUs have cliffs at 32 or higher. The long-term direction in +GPU architecture is toward eliminating static cliffs entirely (Apple Dynamic Caching, Adreno dynamic +allocation). + +#### Verifying register counts + +The register estimates in this document are hand-counted via manual live-range analysis (see Current +state). Shader changes that affect the main or effects pipeline should be verified with `malioc` +(ARM Mali Offline Compiler) against current Valhall driver versions before merging. `malioc` reports +exact register allocation, spilling, and occupancy for each Mali generation. On desktop, Radeon GPU +Analyzer (RGA) and NVIDIA Nsight provide equivalent data. Replacing the hand-counted estimates with +measured `malioc` numbers is a follow-up task. #### Backdrop split: render-pass structure @@ -156,10 +220,11 @@ render target must be copied to a separate texture via `CopyGPUTextureToTexture` level operation that requires ending the current render pass. This boundary exists regardless of shader complexity and cannot be optimized away. -The backdrop pipeline's individual shader passes (downsample, separable blur, composite) are -register-light (~15–40 regs each), so merging them into the effects pipeline would cause no occupancy -problem. But the render-pass boundary makes merging structurally impossible — effects draws happen -inside the main render pass, backdrop draws happen inside their own bracketed pass sequence. +The backdrop pipeline's individual shader passes (downsample, separable blur, composite) are budgeted +at ≤24 registers each (same as the main pipeline), so merging them into the effects pipeline would +cause no occupancy problem. But the render-pass boundary makes merging structurally impossible — +effects draws happen inside the main render pass, backdrop draws happen inside their own bracketed +pass sequence. #### Why not per-primitive-type pipelines (GPUI's approach) @@ -188,9 +253,9 @@ API where each layer draws shadows before quads before glyphs. Our design avoids submission order is draw order, no layer juggling required. **PSO compilation costs multiply.** Each pipeline takes 1–50ms to compile on Metal/Vulkan/D3D12 at -first use. 7 pipelines is ~175ms cold startup; 3 pipelines is ~75ms. Adding state axes (MSAA -variants, blend modes, color formats) multiplies combinatorially — a 2.3× larger variant matrix per -additional axis with 7 pipelines vs 3. +first use. 7 pipelines is ~175ms cold startup; 3 pipelines is ~75ms. Adding state axes (blend +modes, color formats) multiplies combinatorially — a 2.3× larger variant matrix per additional +axis with 7 pipelines vs 3. **Branching cost comparison: unified vs per-kind in the effects pipeline.** The effects pipeline is the strongest candidate for per-kind splitting because effect branches are heavier than shape @@ -271,18 +336,23 @@ There are three categories of branch condition in a fragment shader, ranked by c #### Which category our branches fall into -Our design has two branch points: +Our design has three branch points: 1. **`mode` (push constant): tessellated vs. SDF.** This is category 2 — uniform per draw call. Every thread in every warp of a draw call sees the same `mode` value. **Zero divergence, zero cost.** -2. **`flags` (flat varying from storage buffer): gradient/texture/stroke mode.** This is category 3. - The `flat` interpolation qualifier ensures that all fragments rasterized from one primitive's quad - receive the same flag bits. However, since the SDF path now evaluates only `sdRoundedBox` with no - kind dispatch, the only flag-dependent branches are gradient vs. texture vs. solid color selection - — all lightweight (3–8 instructions per path). Divergence at primitive boundaries between - different flag combinations has negligible cost. +2. **`kind` (flat varying from storage buffer): SDF shape kind dispatch.** This is category 3. + The low byte of `Primitive.flags` encodes `Shape_Kind` (RRect, NGon, Ellipse, Ring_Arc), passed + to the fragment shader as a `flat` varying. All fragments of one primitive's quad receive the same + kind value. The fragment shader's `if/else if` chain selects the appropriate SDF function (~15–30 + instructions per kind). Divergence occurs only at primitive boundaries where adjacent quads have + different kinds. + +3. **`flags` (flat varying from storage buffer): gradient/texture/outline mode.** Also category 3. + The upper bits of `Primitive.flags` encode `Shape_Flags`, controlling gradient vs. texture vs. + solid color selection and outline rendering — all lightweight branches (3–8 instructions per + path). Divergence at primitive boundaries between different flag combinations has negligible cost. For category 3, the divergence analysis depends on primitive size: @@ -299,11 +369,12 @@ For category 3, the divergence analysis depends on primitive size: frame-level divergence is typically **1–3%** of all warps. At 1–3% divergence, the throughput impact is negligible. At 4K with 12.4M total fragments -(~387,000 warps), divergent boundary warps number in the low thousands. Without kind dispatch, the -longest untaken branch is the gradient evaluation (~8 instructions), not a different SDF function. -Each divergent warp pays at most ~8 extra instructions. At ~12G instructions/sec on a mid-range GPU, -that totals ~1.3μs — under 0.02% of an 8.3ms (120 FPS) frame budget. This is -confirmed by production renderers that use exactly this pattern: +(~387,000 warps), divergent boundary warps number in the low thousands. The longest SDF kind branch +is Ring_Arc (~30 instructions); when a divergent warp straddles two different kinds, it pays the cost +of both (~45–60 instructions total). Each divergent warp's extra cost is modest — at ~12G +instructions/sec on a mid-range GPU, even 3,000 divergent warps × 60 extra instructions totals +~15μs, under 0.2% of an 8.3ms (120 FPS) frame budget. This is confirmed by production renderers +that use exactly this pattern: - **vger / vger-rs** (Audulus): single pipeline, 11 primitive kinds dispatched by a `switch` on a flat varying `prim_type`. Ships at 120 FPS on iPads. The author (Taylor Holliday) replaced nanovg @@ -327,10 +398,10 @@ our design: > have no per-fragment data-dependent branches in the main pipeline. 2. **Branches where both paths are very long.** If both sides of a branch are 500+ instructions, - divergent warps pay double a large cost. Without kind dispatch, the SDF path always evaluates - `sdRoundedBox`; the only branches are gradient/texture/solid color selection at 3–8 instructions - each. Even fully divergent, the penalty is ~8 extra instructions — less than a single texture - sample's latency. + divergent warps pay double a large cost. Our SDF kind branches are short (~15–30 instructions + each), and the gradient/texture/solid color selection branches are shorter still (3–8 instructions + each). Even fully divergent, the combined penalty is ~30–60 extra instructions — comparable to a + single texture sample's latency. 3. **Branches that prevent compiler optimizations.** Some compilers cannot schedule instructions across branch boundaries, reducing VLIW utilization on older architectures. Modern GPUs (NVIDIA @@ -338,9 +409,10 @@ our design: concern. 4. **Register pressure from the union of all branches.** This is the real cost, and it is why we - split heavy effects (shadows, glass) into separate pipelines. Within the main pipeline, the SDF - path has a single evaluation (sdRoundedBox) with flag-based color selection, clustering at ~15–18 - registers, so there is negligible occupancy loss. + split heavy effects into separate pipelines. Within the main pipeline, the four + SDF kind branches and flag-based color selection cluster at ~22–26 registers (see register + analysis in Current state), within the ≤24-register budget that guarantees full occupancy on + Valhall and all desktop architectures. See Known limitations for V3D / Bifrost. **References:** @@ -361,27 +433,29 @@ our design: ### Main pipeline: SDF + tessellated (unified) The main pipeline serves two submission modes through a single `TRIANGLELIST` pipeline and a single -vertex input layout, distinguished by a mode marker in the `Primitive.flags` field (low byte: -0 = tessellated, 1 = SDF). The tessellated path sets this to 0 via zero-initialization in the vertex -shader; the SDF path sets it to 1 via `pack_flags`. +vertex input layout, distinguished by a `mode` field in the `Vertex_Uniforms_2D` push constant +(`Core_2D_Mode.Tessellated = 0`, `Core_2D_Mode.SDF = 1`), pushed per draw call via `push_globals`. The +vertex shader branches on this uniform to select the tessellated or SDF code path. - **Tessellated mode** (`mode = 0`): direct vertex buffer with explicit geometry. Used for text - (SDL_ttf atlas sampling), triangle fans/strips, ellipses, regular polygons, circle sectors, and - any user-provided raw vertex geometry. -- **SDF mode** (`mode = 1`): shared unit-quad vertex buffer + GPU storage buffer of `Primitive` - structs, drawn instanced. Used for all shapes with closed-form signed distance functions. + (SDL_ttf atlas sampling), triangles, triangle fans/strips, single-pixel points, and any + user-provided raw vertex geometry. +- **SDF mode** (`mode = 1`): shared unit-quad vertex buffer + GPU storage buffer of + `Core_2D_Primitive` structs, drawn instanced. Used for all shapes with closed-form signed distance + functions. -Both modes use the same fragment shader. The fragment shader checks the mode marker: mode 0 computes -`out = color * texture(tex, uv)`; mode 1 always evaluates `sdRoundedBox` and applies -gradient/texture/solid color based on flag bits. +Both modes use the same fragment shader. The fragment shader checks `Shape_Kind` (low byte of +`Core_2D_Primitive.flags`): kind 0 (`Solid`) is the tessellated path, which premultiplies the texture +sample and computes `out = color * t`; kinds 1–4 dispatch to one of four SDF functions (RRect, NGon, +Ellipse, Ring_Arc) and apply gradient/texture/outline/solid color based on `Shape_Flags` bits. #### Why SDF for shapes CPU-side adaptive tessellation for curved shapes (the current approach) has three problems: 1. **Vertex bandwidth.** A rounded rectangle with four corner arcs produces ~250 vertices × 20 bytes - = 5 KB. An SDF rounded rectangle is one `Primitive` struct (~56 bytes) plus 4 shared unit-quad - vertices. That is roughly a 90× reduction per shape. + = 5 KB. An SDF rounded rectangle is one `Core_2D_Primitive` struct (96 bytes) plus 4 shared + unit-quad vertices. That is roughly a 50× reduction per shape. 2. **Quality.** Tessellated curves are piecewise-linear approximations. At high DPI or under animation/zoom, faceting is visible at any practical segment count. SDF evaluation produces @@ -412,60 +486,55 @@ SDF primitives are submitted via a GPU storage buffer indexed by `gl_InstanceInd shader, rather than encoding per-primitive data redundantly in vertex attributes. This follows the pattern used by both Zed GPUI and vger-rs. -Each SDF shape is described by a single `Primitive` struct (80 bytes) in the storage buffer. The -vertex shader reads `primitives[gl_InstanceIndex]`, computes the quad corner position from the unit -vertex and the primitive's bounds, and passes shape parameters to the fragment shader via `flat` -interpolated varyings. +Each SDF shape is described by a single `Core_2D_Primitive` struct (96 bytes) in the storage +buffer. The vertex shader reads `primitives[gl_InstanceIndex]`, computes the quad corner position +from the unit vertex and the primitive's bounds, and passes shape parameters to the fragment shader +via `flat` interpolated varyings. Compared to encoding per-primitive data in vertex attributes (the "fat vertex" approach), storage- buffer instancing eliminates the 4–6× data duplication across quad corners. A rounded rectangle costs -80 bytes instead of 4 vertices × 40+ bytes = 160+ bytes. +96 bytes instead of 4 vertices × 60+ bytes = 240+ bytes. The tessellated path retains the existing direct vertex buffer layout (20 bytes/vertex, no storage buffer access). The vertex shader branch on `mode` (push constant) is warp-uniform — every invocation in a draw call has the same mode — so it is effectively free on all modern GPUs. -#### Shape folding +#### Shape kinds and SDF dispatch -The SDF path evaluates a single function — `sdRoundedBox` — for all primitives. There is no -`Shape_Kind` enum or per-primitive kind dispatch in the fragment shader. Shapes that are algebraically -special cases of a rounded rectangle are emitted as RRect primitives by the CPU-side drawing procs: +The fragment shader dispatches on `Shape_Kind` (low byte of `Core_2D_Primitive.flags`) to evaluate +one of four signed distance functions. The `Shape_Kind` enum, per-kind `*_Params` structs, and +CPU-side drawing procs all live in `core_2d.odin`. The drawing procs build the appropriate +`Core_2D_Primitive` and set the kind automatically: -| User-facing shape | RRect mapping | Notes | -| ---------------------------- | -------------------------------------------- | ---------------------------------------- | -| Rectangle (sharp or rounded) | Direct | Per-corner radii from `radii` param | -| Circle | `half_size = (r, r)`, `radii = (r, r, r, r)` | Uniform radii = half-size | -| Line segment / capsule | Rotated RRect, `radii = half_thickness` | Stadium shape (fully-rounded minor axis) | -| Full ring / annulus | Stroked circle at mid-radius | `stroke_px = outer - inner` | +Each user-facing shape proc accepts a `Brush` union (color, linear gradient, radial gradient, +or textured fill) as its fill source, plus optional outline parameters. The procs map to SDF +kinds as follows: -Shapes without a closed-form RRect reduction are drawn via the tessellated path: +| User-facing proc | Shape_Kind | SDF function | Notes | +| -------------------- | ---------- | ------------------ | ---------------------------------------------------------- | +| `rectangle` | `RRect` | `sdRoundedBox` | Per-corner radii from `radii` param | +| `circle` | `RRect` | `sdRoundedBox` | Uniform radii = half-size (circle is a degenerate RRect) | +| `line`, `line_strip` | `RRect` | `sdRoundedBox` | Rotated capsule — stadium shape (radii = half-thickness) | +| `ellipse` | `Ellipse` | `sdEllipseApprox` | Approximate ellipse SDF (fast, suitable for UI) | +| `polygon` | `NGon` | `sdRegularPolygon` | Regular N-sided polygon inscribed in a circle | +| `ring` (full) | `Ring_Arc` | Annular radial SDF | `max(inner - r, r - outer)` with no angular clipping | +| `ring` (partial arc) | `Ring_Arc` | Annular radial SDF | Pre-computed edge normals for angular wedge mask | +| `ring` (pie slice) | `Ring_Arc` | Annular radial SDF | `inner_radius = 0`, angular clipping via `start/end_angle` | -| Shape | Tessellated proc | Method | -| ------------------------- | ---------------------------------- | -------------------------- | -| Ellipse | `tes_ellipse`, `tes_ellipse_lines` | Triangle fan approximation | -| Regular polygon (N-gon) | `tes_polygon`, `tes_polygon_lines` | Triangle fan from center | -| Circle sector (pie slice) | `tes_sector` | Triangle fan arc | - -The `Shape_Flags` bit set controls rendering mode per primitive: - -| Flag | Bit | Effect | -| ----------------- | --- | -------------------------------------------------------------------- | -| `Stroke` | 0 | Outline instead of fill (`d = abs(d) - stroke_width/2`) | -| `Textured` | 1 | Sample texture using `uv.uv_rect` (mutually exclusive with Gradient) | -| `Gradient` | 2 | Bilinear 4-corner interpolation from `uv.corner_colors` | -| `Gradient_Radial` | 3 | Radial 2-color falloff (inner/outer) from `uv.corner_colors[0..1]` | +The `Shape_Flags` bit set controls per-primitive rendering mode (outline, gradient, texture, rotation, +arc geometry). See the `Shape_Flag` enum in `core_2d.odin` for the authoritative flag +definitions and bit assignments. **What stays tessellated:** - Text (SDL_ttf atlas, pending future MSDF evaluation) -- Ellipses (`tes_ellipse`, `tes_ellipse_lines`) -- Regular polygons (`tes_polygon`, `tes_polygon_lines`) -- Circle sectors / pie slices (`tes_sector`) -- `tes_triangle`, `tes_triangle_fan`, `tes_triangle_strip` (arbitrary user-provided geometry) +- `tess.pixel` (single-pixel points) +- `tess.triangle`, `tess.triangle_aa`, `tess.triangle_lines` (single triangles) +- `tess.triangle_fan`, `tess.triangle_strip` (arbitrary user-provided geometry) - Any raw vertex geometry submitted via `prepare_shape` -The design rule: if the shape reduces to `sdRoundedBox`, it goes SDF. If it requires a different SDF -function or is described by a vertex list, it stays tessellated. +The design rule: if the shape has a closed-form SDF, it goes through the SDF path with its own +`Shape_Kind`. If it is described by a vertex list or has no practical SDF, it stays tessellated. ### Effects pipeline @@ -526,44 +595,121 @@ Wallace's variant) and vger-rs. ### Backdrop pipeline The backdrop pipeline handles effects that sample the current render target as input: frosted glass, -refraction, mirror surfaces. It is separated from the effects pipeline for a structural reason, not -register pressure. +refraction, mirror surfaces. It is separated from the main and effects pipelines for a structural +reason, not register pressure. **Render-pass boundary.** Before any backdrop-sampling fragment can run, the current render target -must be copied to a separate texture via `CopyGPUTextureToTexture`. This is a command-buffer-level -operation that cannot happen mid-render-pass. The copy naturally creates a pipeline boundary that no -amount of shader optimization can eliminate — it is a fundamental requirement of sampling a surface -while also writing to it. +must be in a sampler-readable state. A draw call that samples the render target it is also writing +to is a hard GPU constraint; the only way to satisfy it is to end the current render pass and start +a new one. That render-pass boundary is what a “bracket” is. **Multi-pass implementation.** Backdrop effects are implemented as separable multi-pass sequences -(downsample → horizontal blur → vertical blur → composite), following the standard approach used by -iOS `UIVisualEffectView`, Android `RenderEffect`, and Flutter's `BackdropFilter`. Each individual -pass has a low-to-medium register footprint (~15–40 registers), well within the main pipeline's -occupancy range. The multi-pass approach avoids the monolithic 70+ register shader that a single-pass -Gaussian blur would require, making backdrop effects viable on low-end mobile GPUs (including -Mali-G31 and VideoCore VI) where per-thread register limits are tight. +(downsample → horizontal blur → vertical blur → composite), following the standard approach used +by iOS `UIVisualEffectView`, Android `RenderEffect`, and Flutter's `BackdropFilter`. Each individual +sub-pass is budgeted at **≤24 registers** (same as the main pipeline — full Valhall occupancy). The +multi-pass approach avoids the monolithic 70+ register shader that a single-pass Gaussian blur would +require, keeping each sub-pass well under the 32-register cliff. -**Bracketed execution.** All backdrop draws in a frame share a single bracketed region of the command -buffer: end the current render pass, copy the render target, execute all backdrop sub-passes, then -resume normal drawing. The entry/exit cost (texture copy + render-pass break) is paid once per frame -regardless of how many backdrop effects are visible. When no backdrop effects are present, the bracket -is never entered and the texture copy never happens — zero cost. +**Render-target choice.** When any layer in the frame contains a backdrop draw, the entire +frame renders into `source_texture` (a full-resolution single-sample texture owned by the +backdrop pipeline) instead of directly into the swapchain. At the end of the frame, +`source_texture` is copied to the swapchain via a single `CopyGPUTextureToTexture` call. +This means the bracket has no mid-frame texture copy: by the time the bracket runs, +`source_texture` already contains the pre-bracket frame contents and is the natural sampler +input. When no layer in the frame has a backdrop draw, the existing fast path runs: the frame +renders directly to the swapchain and the backdrop pipeline's working textures are never +touched. Zero cost for backdrop-free frames. -**Why not split the backdrop sub-passes into separate pipelines?** The individual passes range from -~15 to ~40 registers, which does cross Mali's 32-register cliff. However, the register-pressure argument -that justifies the main/effects split does not apply here. The main/effects split protects the -_common path_ (90%+ of frame fragments) from the uncommon path's register cost. Inside the backdrop -pipeline there is no common-vs-uncommon distinction — if backdrop effects are active, every sub-pass -runs; if not, none run. The backdrop pipeline either executes as a complete unit or not at all. -Additionally, backdrop effects cover a small fraction of the frame's total fragments (~5% at typical -UI scales), so the occupancy variation within the bracket has negligible impact on frame time. +**Why not split the backdrop sub-passes into separate pipelines?** Each sub-pass is budgeted at ≤24 +registers, well under Valhall's 32-register cliff, so there is no occupancy motivation for splitting. +The sub-passes also have no common-vs-uncommon distinction — if backdrop effects are active, every +sub-pass runs; if not, none run. The backdrop pipeline either executes as a complete unit or not at +all. Additionally, backdrop effects cover a small fraction of the frame's total fragments (~5% at +typical UI scales), so even if a sub-pass did cross a cliff, the occupancy variation within the +bracket would have negligible impact on frame time. + +#### Bracket scheduling model + +The bracket is scheduled per layer, anchored at the first backdrop sub-batch in the layer's +submission order. Concretely, a layer with one or more backdrops splits into three groups: + +1. **Pass A (pre-bracket)** — every non-backdrop sub-batch with index `< bracket_start_index`. + Renders to `source_texture` in a single render pass. +2. **The bracket** — every backdrop sub-batch in the layer (regardless of index). Runs one + downsample pass, then one (H-blur + V-composite) pass pair per unique sigma. +3. **Pass B (post-bracket)** — every non-backdrop sub-batch with index `>= bracket_start_index`. + Renders to `source_texture` with `LOAD`, drawing on top of the composited backdrop output. + +`bracket_start_index` is the absolute index of the first `.Backdrop` kind in the layer's sub-batch +range. If the layer has no backdrops, none of this kicks in and the layer renders in a single render +pass via the existing fast path. + +Per-sigma-group execution. The bracket walks each layer's sub-batches and groups contiguous +`.Backdrop` sub-batches that share a sigma; each group picks its own downsample factor (1, 2, or 4) +based on `compute_backdrop_downsample_factor`. For each group it runs four sub-passes: a downsample +from `source_texture` to `downsample_texture`; an H-blur from `downsample_texture` to +`h_blur_texture`; a V-blur from `h_blur_texture` back into `downsample_texture` (ping-pong reuse); +and finally a composite that reads the fully-blurred `downsample_texture`, applies the SDF mask +and tint, and writes the result to `source_texture`. Sub-batch coalescing in +`append_or_extend_sub_batch` merges contiguous same-sigma backdrops into a single instanced +composite draw; non-contiguous same-sigma backdrops still share the blur output but issue separate +composite draws. + +The working textures are sized at the full swapchain resolution; larger downsample factors only +fill a sub-rect via viewport-limited rendering (see the comment block at the top of `backdrop.odin` +for the factor-selection table and rationale). + +#### Submission-order trade-off + +Within Pass A and Pass B, sub-batches render in the user's submission order. What the bracket model +sacrifices is _interleaved_ ordering between backdrop and non-backdrop content within a single +layer. A non-backdrop sub-batch submitted between two backdrops still renders in Pass B (after the +bracket), not at its submission position. Worked example: + +``` +draw.rectangle(layer, bg, GRAY) // 0 Tessellated → Pass A +draw.rectangle(layer, card_blue, BLUE) // 1 SDF → Pass A +draw.gaussian_blur(layer, panelA, sigma=12) // 2 Backdrop → Bracket (sees: bg + blue card) +draw.rectangle(layer, card_red, RED) // 3 SDF → Pass B (drawn ON TOP of panelA) +draw.gaussian_blur(layer, panelB, sigma=12) // 4 Backdrop → Bracket (sees: bg + blue card; same as panelA) +draw.text(layer, "label", ...) // 5 Text → Pass B (drawn ON TOP of both panels) +``` + +In this layer, panelB does _not_ see card_red — even though card_red was submitted before panelB — +because both backdrops sample `source_texture` as it stood at the bracket entry, which is after +Pass A and before card_red has rendered. card_red ends up on top of panelA, not underneath it. + +The user controls the alternative outcome by splitting layers. Putting card_red and panelB into a +new layer (via `draw.new_layer`) gives panelB a fresh source snapshot that includes panelA and +card_red: + +``` +base := draw.begin(...) +draw.rectangle(base, bg, GRAY) +draw.rectangle(base, card_blue, BLUE) +draw.gaussian_blur(base, panelA, sigma=12) // panelA in base layer's bracket + +top := draw.new_layer(base, ...) +draw.rectangle(top, card_red, RED) +draw.gaussian_blur(top, panelB, sigma=12) // top layer's bracket; sees base + card_red +draw.text(top, "label", ...) +``` + +Why one bracket per layer and not one per backdrop? Each bracket adds three render passes +(downsample + H-blur + V-composite) and at least three tile-cache flushes on tilers like Mali +Valhall. Strict submission-order semantics would require one bracket per cluster of contiguous +backdrops, which scales the GPU cost linearly with how interleaved the user's submission happens +to be — a footgun. The current design caps the bracket cost per layer regardless of submission +interleave, and gives the user explicit control over ordering through the existing layer +abstraction. This matches the cost/complexity envelope of iOS `UIVisualEffectView` and CSS +`backdrop-filter` (both of which constrain backdrop ordering implicitly). ### Vertex layout The vertex struct is unchanged from the current 20-byte layout: ``` -Vertex :: struct { +Vertex_2D :: struct { position: [2]f32, // 0: screen-space position uv: [2]f32, // 8: atlas UV (text) or unused (shapes) color: Color, // 16: u8x4, GPU-normalized to float @@ -575,25 +721,30 @@ draws, `position` carries actual world-space geometry. For SDF draws, `position` corners (0,0 to 1,1) and the vertex shader computes world-space position from the storage-buffer primitive's bounds. -The `Primitive` struct for SDF shapes lives in the storage buffer, not in vertex attributes: +The `Core_2D_Primitive` struct for SDF shapes lives in the storage buffer, not in vertex attributes: ``` -Primitive :: struct { - bounds: [4]f32, // 0: min_x, min_y, max_x, max_y - color: Color, // 16: u8x4, unpacked in shader via unpackUnorm4x8 - flags: u32, // 20: low byte = Shape_Kind, bits 8+ = Shape_Flags - rotation_sc: u32, // 24: packed f16 pair (sin, cos). Requires .Rotated flag. - _pad: f32, // 28: reserved for future use - params: Shape_Params, // 32: per-kind params union (half_feather, radii, etc.) (32 bytes) - uv: Uv_Or_Effects, // 64: texture UV rect or gradient/outline parameters (16 bytes) +Core_2D_Primitive :: struct { + bounds: [4]f32, // 0: min_x, min_y, max_x, max_y + color: Color, // 16: u8x4, unpacked in shader via unpackUnorm4x8 + flags: u32, // 20: low byte = Shape_Kind, bits 8+ = Shape_Flags + rotation_sc: u32, // 24: packed f16 pair (sin, cos). Requires .Rotated flag. + _pad: f32, // 28: reserved for future use + params: Shape_Params, // 32: per-kind params union (half_feather, radii, etc.) (32 bytes) + uv_rect: [4]f32, // 64: texture UV coordinates. Read when .Textured. + effects: Gradient_Outline, // 80: gradient and/or outline parameters (16 bytes). } -// Total: 80 bytes (std430 aligned) +// Total: 96 bytes (std430 aligned) ``` -`RRect_Params` holds the rounded-rectangle parameters directly — there is no `Shape_Params` union. -`Uv_Or_Gradient` is a `#raw_union` that aliases `[4]f32` (texture UV rect) with `[4]Color` (gradient -corner colors, clockwise from top-left: TL, TR, BR, BL). The `flags` field encodes both the -tessellated/SDF mode marker (low byte) and shape flags (bits 8+) via `pack_flags`. +`Shape_Params` is a `#raw_union` over `RRect_Params`, `NGon_Params`, `Ellipse_Params`, and +`Ring_Arc_Params` (plus a `raw: [8]f32` view), defined in `core_2d.odin`. Each SDF kind +writes its own params variant; the fragment shader reads the appropriate fields based on `Shape_Kind`. +`Gradient_Outline` is a 16-byte struct containing `gradient_color: Color`, `outline_color: Color`, +`gradient_dir_sc: u32` (packed f16 cos/sin pair), and `outline_packed: u32` (packed f16 outline +width). It is independent of `uv_rect`, so a primitive can carry texture and outline parameters at +the same time. The `flags` field encodes the `Shape_Kind` in the low byte and `Shape_Flags` in bits +8+ via `pack_kind_flags`. ### Draw submission order @@ -617,17 +768,16 @@ pair into bitmap atlases and emits indexed triangle data via `GetGPUTextDrawData **unchanged** by the SDF migration — text continues to flow through the main pipeline's tessellated mode with `mode = 0`, sampling the SDL_ttf atlas texture. -A future phase may evaluate MSDF (multi-channel signed distance field) text rendering, which would +MSDF (multi-channel signed distance field) text rendering may be evaluated later, which would allow resolution-independent glyph rendering from a single small atlas per font. This would involve: - Offline atlas generation via Chlumský's msdf-atlas-gen tool. - Runtime glyph metrics via `vendor:stb/truetype` (already in the Odin distribution). -- A new MSDF glyph mode in the fragment shader, which would require reintroducing a mode/kind - distinction (the current shader evaluates only `sdRoundedBox` with no kind dispatch). +- A new MSDF glyph `Shape_Kind` in the fragment shader (additive — the kind dispatch infrastructure + already exists for the four current SDF kinds). - Potential removal of the SDL_ttf dependency. -This is explicitly deferred. The SDF shape migration is independent of and does not block text -changes. +This is explicitly deferred. **References:** @@ -641,8 +791,8 @@ changes. ### Textures Textures plug into the existing main pipeline — no additional GPU pipeline, no shader rewrite. The -work is a resource layer (registration, upload, sampling, lifecycle) plus two textured-draw procs -that route into the existing tessellated and SDF paths respectively. +work is a resource layer (registration, upload, sampling, lifecycle) plus a `Texture_Fill` Brush +variant that routes the existing shape procs through the SDF path with the `.Textured` flag set. #### Why draw owns registered textures @@ -692,31 +842,30 @@ with the same texture but different samplers produce separate draw calls, which #### Textured draw procs -Textured rectangles route through the existing SDF path via `sdf_rectangle_texture` and -`sdf_rectangle_texture_corners`, mirroring `sdf_rectangle` and `sdf_rectangle_corners` exactly — -same parameters, same naming — with the color parameter replaced by a texture ID plus an optional -tint. +Textures share the same shape procs as colors and gradients. Each shape proc takes a `Brush` +union as its fill source; passing a `Texture_Fill` value (carrying `Texture_Id`, `tint`, +`uv_rect`, and `Sampler_Preset`) routes the draw through the SDF path with the `.Textured` +flag set. There is no dedicated `rectangle_texture` / `circle_texture` proc — the same +`rectangle`, `circle`, `ellipse`, `polygon`, `ring`, `line`, and `line_strip` procs handle +all fill sources. -An earlier iteration of this design considered a separate tessellated proc for "simple" fullscreen -quads, on the theory that the tessellated path's lower register count (~16 regs vs ~18 for the SDF -textured branch) would improve occupancy at large fragment counts. Applying the register-pressure -analysis from the pipeline-strategy section above shows this is wrong: both 16 and 18 registers are -well below the register cliff (~43 regs on consumer Ampere/Ada, ~32 on Volta/A100), so both run at -100% occupancy. The remaining ALU difference (~15 extra instructions for the SDF evaluation) amounts -to ~20μs at 4K — below noise. Meanwhile, splitting into a separate pipeline would add ~1–5μs per -pipeline bind on the CPU side per scissor, matching or exceeding the GPU-side savings. Within the -main pipeline, unified remains strictly better. +A separate tessellated proc for "simple" fullscreen quads was considered on the theory that +the tessellated path's lower register count would improve occupancy at large fragment counts. +Both paths are well within the ≤24-register main pipeline budget — both run at full +occupancy on every target architecture (Valhall and above). The remaining ALU difference +(~15 extra instructions for the SDF evaluation) amounts to ~20μs at 4K — below noise. +Meanwhile, splitting into a separate pipeline would add ~1–5μs per pipeline bind on the CPU +side per scissor, matching or exceeding the GPU-side savings. Within the main pipeline, +unified remains strictly better. -The naming convention uses `sdf_` and `tes_` prefixes to indicate the rendering path, with suffixes -for modifiers: `sdf_rectangle_texture` and `sdf_rectangle_texture_corners` sit alongside -`sdf_rectangle` (solid or gradient overload). Proc groups like `sdf_rectangle` dispatch to -`sdf_rectangle_solid` or `sdf_rectangle_gradient` based on argument count. Future per-shape texture -variants (`sdf_circle_texture`) are additive. +SDF drawing procs live in the `draw` package with unprefixed names (`rectangle`, `circle`, +`ellipse`, `polygon`, `ring`, `line`, `line_strip`). Gradients, textures, and outlines are +selected via the `Brush` union and optional outline parameters rather than separate overloads. #### What SDF anti-aliasing does and does not do for textured draws The SDF path anti-aliases the **shape's outer silhouette** — rounded-corner edges, rotated edges, -stroke outlines. It does not anti-alias or sharpen the texture content. Inside the shape, fragments +outline edges. It does not anti-alias or sharpen the texture content. Inside the shape, fragments sample through the chosen `Sampler_Preset`, and image quality is whatever the sampler produces from the source texels. A low-resolution texture displayed at a large size shows bilinear blur regardless of which draw proc is used. This matches the current text-rendering model, where glyph sharpness @@ -725,8 +874,8 @@ depends on how closely the display size matches the SDL_ttf atlas's rasterized s #### Fit modes are a computation layer, not a renderer concept Standard image-fit behaviors (stretch, fill/cover, fit/contain, tile, center) are expressed as UV -sub-region computations on top of the `uv_rect` parameter that both textured-draw procs accept. The -renderer has no knowledge of fit modes — it samples whatever UV region it is given. +sub-region computations on top of the `uv_rect` field of `Texture_Fill`. The renderer has no +knowledge of fit modes — it samples whatever UV region it is given. A `fit_params` helper computes the appropriate `uv_rect`, sampler preset, and (for letterbox/fit mode) shrunken inner rect from a `Fit_Mode` enum, the target rect, and the texture's pixel size. @@ -750,13 +899,13 @@ textures onto a free list that is processed in `r_end_frame`, not at the call si Clay's `RenderCommandType.Image` is handled by dereferencing `imageData: rawptr` as a pointer to a `Clay_Image_Data` struct containing a `Texture_Id`, `Fit_Mode`, and tint color. Routing mirrors the -existing rectangle handling: zero `cornerRadius` dispatches to `sdf_rectangle_texture` (SDF, sharp -corners), nonzero dispatches to `sdf_rectangle_texture_corners` (SDF, per-corner radii). A -`fit_params` call computes UVs from the fit mode before dispatch. +existing rectangle handling: `fit_params` computes UVs from the fit mode, then `rectangle` is +called with a `Texture_Fill` brush and the appropriate radii (zero for sharp corners, per-corner +values from Clay's `cornerRadius` otherwise). #### Deferred features -The following are plumbed in the descriptor but not implemented in phase 1: +The following are plumbed in `Texture_Desc` but not yet implemented: - **Mipmaps**: `Texture_Desc.mip_levels` field exists; generation via SDL3 deferred. - **Compressed formats**: `Texture_Desc.format` accepts BC/ASTC; upload path deferred. @@ -764,7 +913,6 @@ The following are plumbed in the descriptor but not implemented in phase 1: - **3D textures, arrays, cube maps**: `Texture_Desc.type` and `depth_or_layers` fields exist. - **Additional samplers**: anisotropic, trilinear, clamp-to-border — additive enum values. - **Atlas packing**: internal optimization for sub-batch coalescing; invisible to callers. -- **Per-shape texture variants**: `sdf_circle_texture`, `tes_ellipse_texture`, `tes_polygon_texture` — potential future additions, reserved by naming convention. **References:** diff --git a/draw/backdrop.odin b/draw/backdrop.odin new file mode 100644 index 0000000..f7437db --- /dev/null +++ b/draw/backdrop.odin @@ -0,0 +1,1146 @@ +package draw + +import "core:log" +import "core:math" +import "core:mem" +import sdl "vendor:sdl3" + +// This file hosts the backdrop subsystem: any visual effect that samples the current +// framebuffer as input. Today the only implemented effect is Gaussian blur (frosted glass); +// future effects (refraction, mirror, etc.) will live here too. +// +// The file is split into two top-level sections: +// +// 1. Shared backdrop infrastructure — bracket coordination, source_texture lifecycle, +// sub-batch scanners. These are general to any backdrop effect: every backdrop effect +// needs a snapshot of the framebuffer (source_texture) and needs to participate in the +// bracket render-pass-boundary scheduling. When a second effect is added, its +// per-effect resources go in their own section like the Gaussian blur one below; this +// shared section stays. +// +// 2. Gaussian blur — the only effect implemented today. Owns its own PSOs, working +// textures (downsample / h_blur), per-primitive storage layout, kernel math, and +// bracket-runner inner loop. None of this is shared with future backdrop effects: a +// refraction shader would have its own PSO, its own primitive struct, and likely +// wouldn't need the downsample/h_blur intermediates at all. +// +// The `Backdrop` struct currently holds resources from both categories; field-group +// comments inside it mark which are which. When a second effect lands the struct will be +// split, but doing that pre-emptively means inventing a per-effect dispatch protocol on +// speculation. Better to keep the conflation visible (and labeled) until concrete needs +// shape the design. + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Shared backdrop infrastructure ------------ +// --------------------------------------------------------------------------------------------------------------------- + +//INTERNAL +Backdrop :: struct { + // -- Shared across all backdrop effects -- + + // When any backdrop draw exists this frame, the entire frame renders into source_texture + // instead of the swapchain. Acts as the bracket's snapshot input by virtue of already + // containing the pre-bracket frame. Copied to the swapchain at frame end. + source_texture: ^sdl.GPUTexture, + + // Cached pixel dimensions for resize-detection in `ensure_backdrop_textures`. + cached_width: u32, + cached_height: u32, + + // Linear-clamp sampler used for sampling source_texture (and Gaussian blur's working + // textures). Linear filtering is required by the Gaussian linear-sampling pair trick; + // any future backdrop effect that samples source_texture with bilinear interpolation + // can reuse this sampler. Clamp avoids edge-bleed at work-region boundaries. + sampler: ^sdl.GPUSampler, + + // -- Gaussian blur effect -- + + // Two graphics pipelines. The downsample PSO is a single-bilinear-sample fullscreen pass; + // the blur PSO is mode-branched (H-blur fullscreen + V-composite instanced) and shares + // one shader program for both modes via a uniform `mode` selector. + downsample_pipeline: ^sdl.GPUGraphicsPipeline, + blur_pipeline: ^sdl.GPUGraphicsPipeline, + + // Per-instance Gaussian_Blur_Primitive storage buffer. Grows on demand via grow_buffer_if_needed. + // All backdrop primitives across all layers in a frame share this single buffer; sub-batches + // reference into it by offset. + primitive_buffer: Buffer, + + // Working textures, allocated once at swapchain resolution and recreated only on resize. + // Both are sized at full swapchain resolution and single-sample. Larger downsample + // factors fill only a sub-rect via viewport-limited rendering (see file-header comment + // on adaptive downsampling in the Gaussian blur section below). + // downsample_texture — written by the downsample PSO. Read by the blur PSO in mode 0. + // h_blur_texture — written by the blur PSO in mode 0. Read by the blur PSO in mode 1. + downsample_texture: ^sdl.GPUTexture, + h_blur_texture: ^sdl.GPUTexture, +} + +//INTERNAL +create_backdrop :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window) -> (pipeline: Backdrop, ok: bool) { + // On failure, clean up any partially-created resources. + defer if !ok { + if pipeline.sampler != nil do sdl.ReleaseGPUSampler(device, pipeline.sampler) + if pipeline.primitive_buffer.gpu != nil do destroy_buffer(device, &pipeline.primitive_buffer) + if pipeline.blur_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.blur_pipeline) + if pipeline.downsample_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.downsample_pipeline) + } + + active_shader_formats := sdl.GetGPUShaderFormats(device) + if PLATFORM_SHADER_FORMAT_FLAG not_in active_shader_formats { + log.errorf( + "backdrop: no embedded shader matches active GPU formats; build supports %v but device reports %v", + PLATFORM_SHADER_FORMAT, + active_shader_formats, + ) + return pipeline, false + } + + swapchain_format := sdl.GetGPUSwapchainTextureFormat(device, window) + + //----- Shader modules ---------------------------------- + + fullscreen_vert := sdl.CreateGPUShader( + device, + sdl.GPUShaderCreateInfo { + code_size = len(BACKDROP_FULLSCREEN_VERT_RAW), + code = raw_data(BACKDROP_FULLSCREEN_VERT_RAW), + entrypoint = SHADER_ENTRY, + format = {PLATFORM_SHADER_FORMAT_FLAG}, + stage = .VERTEX, + }, + ) + if fullscreen_vert == nil { + log.errorf("Could not create backdrop fullscreen vertex shader: %s", sdl.GetError()) + return pipeline, false + } + defer sdl.ReleaseGPUShader(device, fullscreen_vert) + + downsample_frag := sdl.CreateGPUShader( + device, + sdl.GPUShaderCreateInfo { + code_size = len(BACKDROP_DOWNSAMPLE_FRAG_RAW), + code = raw_data(BACKDROP_DOWNSAMPLE_FRAG_RAW), + entrypoint = SHADER_ENTRY, + format = {PLATFORM_SHADER_FORMAT_FLAG}, + stage = .FRAGMENT, + num_samplers = 1, + num_uniform_buffers = 1, + }, + ) + if downsample_frag == nil { + log.errorf("Could not create backdrop downsample fragment shader: %s", sdl.GetError()) + return pipeline, false + } + defer sdl.ReleaseGPUShader(device, downsample_frag) + + blur_vert := sdl.CreateGPUShader( + device, + sdl.GPUShaderCreateInfo { + code_size = len(BACKDROP_BLUR_VERT_RAW), + code = raw_data(BACKDROP_BLUR_VERT_RAW), + entrypoint = SHADER_ENTRY, + format = {PLATFORM_SHADER_FORMAT_FLAG}, + stage = .VERTEX, + num_uniform_buffers = 1, + num_storage_buffers = 1, + }, + ) + if blur_vert == nil { + log.errorf("Could not create backdrop blur vertex shader: %s", sdl.GetError()) + return pipeline, false + } + defer sdl.ReleaseGPUShader(device, blur_vert) + + blur_frag := sdl.CreateGPUShader( + device, + sdl.GPUShaderCreateInfo { + code_size = len(BACKDROP_BLUR_FRAG_RAW), + code = raw_data(BACKDROP_BLUR_FRAG_RAW), + entrypoint = SHADER_ENTRY, + format = {PLATFORM_SHADER_FORMAT_FLAG}, + stage = .FRAGMENT, + num_samplers = 1, + num_uniform_buffers = 1, + }, + ) + if blur_frag == nil { + log.errorf("Could not create backdrop blur fragment shader: %s", sdl.GetError()) + return pipeline, false + } + defer sdl.ReleaseGPUShader(device, blur_frag) + + //----- Downsample PSO ---------------------------------- + // Single bilinear sample, blend disabled. No vertex buffer (gl_VertexIndex 0..2 emits the + // fullscreen triangle). Single-sample target (working textures are never MSAA). + downsample_target := sdl.GPUColorTargetDescription { + format = swapchain_format, + blend_state = sdl.GPUColorTargetBlendState{enable_blend = false}, + } + pipeline.downsample_pipeline = sdl.CreateGPUGraphicsPipeline( + device, + sdl.GPUGraphicsPipelineCreateInfo { + vertex_shader = fullscreen_vert, + fragment_shader = downsample_frag, + primitive_type = .TRIANGLELIST, + multisample_state = sdl.GPUMultisampleState{sample_count = ._1}, + target_info = sdl.GPUGraphicsPipelineTargetInfo { + color_target_descriptions = &downsample_target, + num_color_targets = 1, + }, + }, + ) + if pipeline.downsample_pipeline == nil { + log.errorf("Failed to create backdrop downsample graphics pipeline: %s", sdl.GetError()) + return pipeline, false + } + + //----- Blur PSO (H-blur + V-composite, mode-branched) -------------- + // Premultiplied-over blend matching the main pipeline. No vertex buffer (mode 0 uses + // gl_VertexIndex 0..2 fullscreen tri; mode 1 uses gl_VertexIndex 0..5 unit-quad + + // gl_InstanceIndex into the storage buffer). + // + // Single-sample throughout: levlib does not support MSAA (see init's doc comment in + // draw.odin). The whole frame renders to single-sample targets, so sample_count = ._1 + // matches both mode 0 (writes h_blur_texture) and mode 1 (writes source_texture). + blur_target := sdl.GPUColorTargetDescription { + format = swapchain_format, + blend_state = sdl.GPUColorTargetBlendState { + enable_blend = true, + enable_color_write_mask = true, + src_color_blendfactor = .ONE, + dst_color_blendfactor = .ONE_MINUS_SRC_ALPHA, + color_blend_op = .ADD, + src_alpha_blendfactor = .ONE, + dst_alpha_blendfactor = .ONE_MINUS_SRC_ALPHA, + alpha_blend_op = .ADD, + color_write_mask = sdl.GPUColorComponentFlags{.R, .G, .B, .A}, + }, + } + pipeline.blur_pipeline = sdl.CreateGPUGraphicsPipeline( + device, + sdl.GPUGraphicsPipelineCreateInfo { + vertex_shader = blur_vert, + fragment_shader = blur_frag, + primitive_type = .TRIANGLELIST, + multisample_state = sdl.GPUMultisampleState{sample_count = ._1}, + target_info = sdl.GPUGraphicsPipelineTargetInfo { + color_target_descriptions = &blur_target, + num_color_targets = 1, + }, + }, + ) + if pipeline.blur_pipeline == nil { + log.errorf("Failed to create backdrop blur graphics pipeline: %s", sdl.GetError()) + return pipeline, false + } + + //----- Storage buffer for Gaussian_Blur_Primitive instances ------------- + pipeline.primitive_buffer = create_buffer( + device, + size_of(Gaussian_Blur_Primitive) * BUFFER_INIT_SIZE, + sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ}, + ) or_return + + //----- Sampler ---------------------------------- + pipeline.sampler = sdl.CreateGPUSampler( + device, + sdl.GPUSamplerCreateInfo { + min_filter = .LINEAR, + mag_filter = .LINEAR, + mipmap_mode = .LINEAR, + address_mode_u = .CLAMP_TO_EDGE, + address_mode_v = .CLAMP_TO_EDGE, + address_mode_w = .CLAMP_TO_EDGE, + }, + ) + if pipeline.sampler == nil { + log.errorf("Could not create backdrop GPU sampler: %s", sdl.GetError()) + return pipeline, false + } + + log.debug("Done creating backdrop subsystem") + return pipeline, true +} + +//INTERNAL +destroy_backdrop :: proc(device: ^sdl.GPUDevice, pipeline: ^Backdrop) { + if pipeline.h_blur_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.h_blur_texture) + if pipeline.downsample_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.downsample_texture) + if pipeline.source_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.source_texture) + if pipeline.sampler != nil do sdl.ReleaseGPUSampler(device, pipeline.sampler) + destroy_buffer(device, &pipeline.primitive_buffer) + if pipeline.blur_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.blur_pipeline) + if pipeline.downsample_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.downsample_pipeline) +} + +//----- Working texture management ---------------------------------- + +// Allocate (or reallocate, on resize) the three working textures that the backdrop bracket +// uses. All three are sized at full swapchain resolution, single-sample, share the swapchain +// format, and need {.COLOR_TARGET, .SAMPLER} usage so they can be written by render passes +// and read by subsequent passes. +// +// `source_texture` is shared infrastructure (used by every backdrop effect). +// `downsample_texture` and `h_blur_texture` are Gaussian-blur-specific intermediates; a +// future backdrop effect with no downsample/blur prep would skip them. +// +// Recreates on dimension change only — same-size frames hit the early-out and skip GPU +// resource churn. +//INTERNAL +ensure_backdrop_textures :: proc(device: ^sdl.GPUDevice, format: sdl.GPUTextureFormat, width, height: u32) { + pipeline := &GLOB.backdrop + if pipeline.source_texture != nil && pipeline.cached_width == width && pipeline.cached_height == height { + return + } + + // Free any prior allocations (handles resize and the very-first call where these are nil). + if pipeline.h_blur_texture != nil { + sdl.ReleaseGPUTexture(device, pipeline.h_blur_texture) + pipeline.h_blur_texture = nil + } + if pipeline.downsample_texture != nil { + sdl.ReleaseGPUTexture(device, pipeline.downsample_texture) + pipeline.downsample_texture = nil + } + if pipeline.source_texture != nil { + sdl.ReleaseGPUTexture(device, pipeline.source_texture) + pipeline.source_texture = nil + } + + // Working textures are sized at full swapchain resolution to support factor=1 (no downsample + // for small σ, where any 2:1 round-trip would visibly soften the output). Larger factors just + // write to a sub-rect via viewport-limited rendering. See the file-header comment. + working_width := width + working_height := height + + pipeline.source_texture = sdl.CreateGPUTexture( + device, + sdl.GPUTextureCreateInfo { + type = .D2, + format = format, + usage = {.COLOR_TARGET, .SAMPLER}, + width = width, + height = height, + layer_count_or_depth = 1, + num_levels = 1, + sample_count = ._1, + }, + ) + if pipeline.source_texture == nil { + log.panicf("Failed to create backdrop source texture (%dx%d): %s", width, height, sdl.GetError()) + } + + pipeline.downsample_texture = sdl.CreateGPUTexture( + device, + sdl.GPUTextureCreateInfo { + type = .D2, + format = format, + usage = {.COLOR_TARGET, .SAMPLER}, + width = working_width, + height = working_height, + layer_count_or_depth = 1, + num_levels = 1, + sample_count = ._1, + }, + ) + if pipeline.downsample_texture == nil { + log.panicf( + "Failed to create backdrop downsample texture (%dx%d): %s", + working_width, + working_height, + sdl.GetError(), + ) + } + + pipeline.h_blur_texture = sdl.CreateGPUTexture( + device, + sdl.GPUTextureCreateInfo { + type = .D2, + format = format, + usage = {.COLOR_TARGET, .SAMPLER}, + width = working_width, + height = working_height, + layer_count_or_depth = 1, + num_levels = 1, + sample_count = ._1, + }, + ) + if pipeline.h_blur_texture == nil { + log.panicf( + "Failed to create backdrop h_blur texture (%dx%d): %s", + working_width, + working_height, + sdl.GetError(), + ) + } + + pipeline.cached_width = width + pipeline.cached_height = height +} + +//----- Frame / layer scanners ---------------------------------- + +// Returns true if any sub-batch in any layer this frame is .Backdrop kind. Called once at the +// top of `end()` to decide whether to route the whole frame to source_texture. +// O(total sub-batches) but with an early-exit on the first hit, so typical cost is tiny. +//INTERNAL +frame_has_backdrop :: proc() -> bool { + for &batch in GLOB.tmp_sub_batches { + if batch.kind == .Backdrop do return true + } + return false +} + +// Returns the absolute index of the first .Backdrop sub-batch in the layer's sub-batch range, +// or -1 if the layer has no backdrops. The index is into GLOB.tmp_sub_batches (not relative to +// layer.sub_batch_start), to match how draw_layer's render-range helpers consume it. +//INTERNAL +find_first_backdrop_in_layer :: proc(layer: ^Layer) -> int { + for i in 0 ..< layer.sub_batch_len { + abs_idx := layer.sub_batch_start + i + if GLOB.tmp_sub_batches[abs_idx].kind == .Backdrop do return int(abs_idx) + } + return -1 +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Gaussian blur ------------ +// --------------------------------------------------------------------------------------------------------------------- + +// Adaptive downsample design (Flutter-style). +// +// The bracket picks a downsample factor per-sigma-group, not as a global constant. The choice +// is driven by Flutter's `CalculateScale` formula in +// impeller/entity/contents/filters/gaussian_blur_filter_contents.cc (originally from Skia's +// GrBlurUtils): downsample so that the sigma in working-resolution pixels stays in the +// 2..4 range. This keeps the kernel reach wide enough to hide high-frequency artifacts from +// the bilinear upsample at the composite, while keeping the kernel's discrete tap count +// small (≤3σ reach → ≈12 paired taps). +// +// The full table, in physical pixels (sigma_logical * dpi_scaling): +// +// sigma_phys ≤ 4 → factor = 1 (no downsample; source is sampled directly) +// sigma_phys ≤ 8 → factor = 2 +// sigma_phys > 8 → factor = 4 (capped) +// +// Capped at factor=4 to favor visual quality over bandwidth at the high end. Larger factors +// (8 and 16) would lose more high-frequency detail than the kernel can mask even with the +// H+V split, and the bandwidth saving is small (the work region also shrinks quadratically, +// so most of the savings are already captured at factor=4). +// +// Working textures are sized at full swapchain resolution to support factor=1. Larger factors +// just write to a smaller sub-rect via viewport-limited rendering. Memory cost: full-res +// working textures (2 textures, RGBA8) is roughly 16 MB at 1080p, 64 MB at 4K. On modern +// GPUs this is well within budget; on Mali Valhall SBCs it's negligible against unified- +// memory headroom. +// +// The shaders read the factor as a uniform. The downsample shader has three paths (factor=1 +// identity, factor=2 single bilinear tap, factor>=4 four bilinear taps with offsets scaling +// by factor/4). The V-composite mode of backdrop_blur.frag uses inv_downsample_factor to +// scale full-res frag coords down to working-res UV. + +//----- GPU types ---------------------------------- + +// Maximum number of (weight, offset) pairs in a single blur kernel. Each pair represents +// the linear-sampling pair adjustment (one bilinear fetch covering two adjacent texels); +// pair[0] is the center weight with offset 0. With 32 pairs we cover up to 63 input texels +// (1 center + 31 paired symmetric taps × 2 texels each), enough for sigma values well past +// the 4..24 typical UI range. Must match MAX_KERNEL_PAIRS in shaders/source/backdrop_blur.frag. +//INTERNAL +MAX_GAUSSIAN_BLUR_KERNEL_PAIRS :: 32 + +// Gaussian_Blur_Primitive is the GPU-side per-primitive storage layout. Mirrors the GLSL std430 +// struct in shaders/source/backdrop_blur.vert. Field order is chosen so std430 alignment +// rules pack the struct to a clean 48-byte natural layout (no implicit padding): vec4 +// members come first (16-byte aligned at any offset), then vec2, then scalars. The total is +// a multiple of 16 so the std430 array stride matches size_of(...) exactly. +// +// Gaussian blur primitives are RRect-only: rectangles, rounded rectangles, and circles +// (via uniform_radii) are all expressible. Rotation is intentionally omitted — backdrop +// sampling is in screen space, so a rotated mask over a stationary blur sample would look +// visually wrong. iOS, CSS backdrop-filter, and Flutter BackdropFilter all enforce this +// implicitly; we enforce it explicitly by leaving no rotation field. +// +// Outline is also intentionally omitted. A specialized edge effect (e.g. liquid-glass-style +// refraction outlines) would be implemented as a dedicated primitive type with its own +// pipeline rather than tacked onto this one as a flag bit. +//INTERNAL +Gaussian_Blur_Primitive :: struct { + bounds: [4]f32, // 0: 16 — world-space quad (min_xy, max_xy) + radii: [4]f32, // 16: 16 — per-corner radii in physical pixels (BR, TR, BL, TL) + half_size: [2]f32, // 32: 8 — RRect half extents (physical px) + half_feather: f32, // 40: 4 — feather_px * 0.5 (SDF anti-aliasing) + color: Color, // 44: 4 — tint, packed RGBA u8x4 +} +#assert(size_of(Gaussian_Blur_Primitive) == 48) + +// Vertex uniforms for the unified blur PSO (mode 0 = H-blur, mode 1 = V-composite). +// Matches the GLSL Uniforms block in shaders/source/backdrop_blur.vert. The downsample +// PSO has no vertex uniforms. +//INTERNAL +Gaussian_Blur_Vert_Uniforms :: struct { + projection: matrix[4, 4]f32, // 0: 64 — screen-space ortho (mode 1 only; mode 0 ignores) + dpi_scale: f32, // 64: 4 + mode: u32, // 68: 4 — 0 = H-blur fullscreen tri; 1 = V-composite instanced quads + _pad0: [2]f32, // 72: 8 — std140 vec4 alignment pad +} + +// Fragment uniforms for the downsample PSO. Matches Uniforms block in +// shaders/source/backdrop_downsample.frag. +//INTERNAL +Gaussian_Blur_Downsample_Frag_Uniforms :: struct { + inv_source_size: [2]f32, // 0: 8 — 1.0 / source_texture pixel dimensions (full-res) + downsample_factor: u32, // 8: 4 — 1, 2, or 4 (selects identity / 1-tap / 4-tap path in shader) + _pad0: u32, // 12: 4 +} + +// Fragment uniforms for the unified blur PSO (mode 0 + mode 1). Matches the GLSL Uniforms +// block in shaders/source/backdrop_blur.frag. The kernel array holds the linear-sampling +// pair coefficients computed CPU-side via `compute_blur_kernel`. +//INTERNAL +Gaussian_Blur_Frag_Uniforms :: struct { + inv_working_size: [2]f32, // 0: 8 — 1.0 / working-resolution texture dimensions + pair_count: u32, // 8: 4 — number of (weight, offset) pairs; pair[0] is center + mode: u32, // 12: 4 — 0 = H-blur, 1 = V-composite (must match vert mode) + direction: [2]f32, // 16: 8 — (1,0) for H-blur, (0,1) for V-composite + inv_downsample_factor: f32, // 24: 4 — 1.0 / downsample_factor (mode 1 only; mode 0 ignores) + _pad0: f32, // 28: 4 + kernel: [MAX_GAUSSIAN_BLUR_KERNEL_PAIRS][4]f32, // 32: 512 — .x = weight, .y = offset (texels) +} + +//----- Kernel computation ---------------------------------- + +// Compute Gaussian blur kernel weights with the linear-sampling pair adjustment. +// Adapted from RAD Debugger's r_d3d11_g_blur_shader_src CPU-side coefficient generation +// and Daniel Rákos's "Efficient Gaussian blur with linear sampling" article. +// +// The trick: bilinear sampling lets us fetch (1-t)*pixel[i] + t*pixel[i+1] with a single +// texture lookup. So for any pair of adjacent discrete weights w0, w1 we can collapse them +// into one bilinear fetch with weight w = w0+w1 sampled at offset i + w1/w. This halves the +// fragment-shader sample count for a given kernel radius. +// +// Output: `kernel[0]` is the center weight (offset 0), and `kernel[1..pair_count-1]` each +// hold one paired tap (sampled symmetrically as ±offset in the shader). The shader iterates +// `i in [1, pair_count)` and does two texture fetches per pair — one at +offset, one at +// -offset — for a total of 1 + 2*(pair_count-1) bilinear fetches per fragment. +// +// `sigma` is the true Gaussian standard deviation in the kernel's working-space units +// (working-resolution texels, after the caller has converted from logical pixels via +// dpi_scaling and the downsample factor). The kernel extent reaches ±3σ, capturing 99.7% of +// the Gaussian's +// mass; weights beyond that contribute imperceptibly. sigma <= 0 produces a degenerate +// kernel `{1, 0}` that acts as a sharp pass-through. After the loop, the discrete weights +// are normalized so they sum to 1.0 (truncating at ±3σ loses a tiny amount of mass; we +// renormalize to preserve overall image brightness). +// +// Note on the parameter contract: this routine takes σ directly and derives the tap count +// from it, rather than the inverse (RAD Debugger's algorithm passes a tap count and derives +// `stdev = (blur_count-1)/2`). Taking σ directly matches what callers expect when they read +// "gaussian_sigma" — passing tap count under that name was a footgun. +//INTERNAL +compute_blur_kernel :: proc( + sigma: f32, + kernel: ^[MAX_GAUSSIAN_BLUR_KERNEL_PAIRS][4]f32, +) -> ( + pair_count: u32, +) { + if sigma <= 0 { + kernel[0] = {1, 0, 0, 0} + return 1 + } + + // Per-side discrete tap count: ceil(3*sigma) + 1 (center + 3σ reach on each side). + // Cap at the storage budget. With MAX_GAUSSIAN_BLUR_KERNEL_PAIRS=32 each pair collapses 2 + // discrete taps via linear-sampling, so max discrete taps per side = 1 + 31*2 = 63. + discrete_taps := u32(math.ceil(3 * sigma)) + 1 + max_taps := u32(MAX_GAUSSIAN_BLUR_KERNEL_PAIRS - 1) * 2 + 1 + if discrete_taps > max_taps do discrete_taps = max_taps + if discrete_taps < 2 { + // Sigma was so small that 3σ < 1 texel; degenerate to a sharp sample. + kernel[0] = {1, 0, 0, 0} + return 1 + } + + // Compute discrete weights[i] = exp(-i² / (2σ²)). The inv_root prefactor cancels in the + // final normalization, so we skip it. + weights: [MAX_GAUSSIAN_BLUR_KERNEL_PAIRS * 2]f32 = {} + two_sigma_sq := 2 * sigma * sigma + total: f32 = 0 + for i in 0 ..< discrete_taps { + x := f32(i) + weights[i] = math.exp(-x * x / two_sigma_sq) + // weights[0] is the center; weights[1..] are sampled on both sides, so they count twice. + total += weights[i] if i == 0 else 2 * weights[i] + } + // Normalize so the kernel sums to exactly 1.0 across the full ±3σ extent. + if total > 0 { + inv_total := 1.0 / total + for i in 0 ..< discrete_taps do weights[i] *= inv_total + } + + // Linear-sampling pair adjustment: weights[1] and weights[2] collapse to one bilinear + // fetch with weight w = w0+w1 at offset i + w1/w. `weights` is sized 2*MAX so that + // `weights[i+1]` access on odd i up to discrete_taps-1 is always in bounds. + kernel[0] = {weights[0], 0, 0, 0} + pair_count = 1 + for i := u32(1); i < discrete_taps; i += 2 { + w0 := weights[i] + w1 := weights[i + 1] + w := w0 + w1 + // Guard against a div-by-zero where both adjacent weights underflow to 0 (only happens + // at the tail of a very tight kernel; numerically-degenerate but legal). + offset := f32(i) + if w > 0 do offset = f32(i) + w1 / w + kernel[pair_count] = {w, offset, 0, 0} + pair_count += 1 + } + return pair_count +} + +// Pick a downsample factor for a given sigma. See the file-header comment for the table and +// rationale. Returned values: {1, 2, 4}. +//INTERNAL +compute_backdrop_downsample_factor :: proc(sigma_logical: f32) -> u32 { + sigma_phys := sigma_logical * GLOB.dpi_scaling + switch { + case sigma_phys <= 4: return 1 + case sigma_phys <= 8: return 2 + case: return 4 + } +} + +//----- Uniform push helpers ---------------------------------- + +// Push the Gaussian_Blur_Vert_Uniforms block to the vertex stage at slot 0. +//INTERNAL +push_backdrop_vert_globals :: proc(cmd_buffer: ^sdl.GPUCommandBuffer, width: f32, height: f32, mode: u32) { + uniforms := Gaussian_Blur_Vert_Uniforms { + projection = ortho_rh(left = 0.0, top = 0.0, right = width, bottom = height, near = -1.0, far = 1.0), + dpi_scale = GLOB.dpi_scaling, + mode = mode, + } + sdl.PushGPUVertexUniformData(cmd_buffer, 0, &uniforms, size_of(Gaussian_Blur_Vert_Uniforms)) +} + +// Push the Gaussian_Blur_Downsample_Frag_Uniforms block to the fragment stage at slot 0. +//INTERNAL +push_backdrop_downsample_frag_globals :: proc( + cmd_buffer: ^sdl.GPUCommandBuffer, + source_width, source_height: u32, + downsample_factor: u32, +) { + uniforms := Gaussian_Blur_Downsample_Frag_Uniforms { + inv_source_size = {1.0 / f32(source_width), 1.0 / f32(source_height)}, + downsample_factor = downsample_factor, + } + sdl.PushGPUFragmentUniformData(cmd_buffer, 0, &uniforms, size_of(Gaussian_Blur_Downsample_Frag_Uniforms)) +} + +// Push the Gaussian_Blur_Frag_Uniforms block (kernel + pass mode/direction) to the fragment stage at slot 0. +//INTERNAL +push_backdrop_blur_frag_globals :: proc( + cmd_buffer: ^sdl.GPUCommandBuffer, + uniforms: ^Gaussian_Blur_Frag_Uniforms, +) { + sdl.PushGPUFragmentUniformData(cmd_buffer, 0, uniforms, size_of(Gaussian_Blur_Frag_Uniforms)) +} + +//----- Storage-buffer upload ---------------------------------- + +// Upload all Gaussian_Blur_Primitive instances staged this frame to the backdrop subsystem's storage +// buffer. Mirrors the SDF primitive upload in core_2d.odin's `upload`. Called from +// `end()` inside the same copy pass that uploads vertices/indices/SDF primitives. +//INTERNAL +upload_backdrop_primitives :: proc(device: ^sdl.GPUDevice, pass: ^sdl.GPUCopyPass) { + prim_count := u32(len(GLOB.tmp_gaussian_blur_primitives)) + if prim_count == 0 do return + + prim_size := prim_count * size_of(Gaussian_Blur_Primitive) + grow_buffer_if_needed( + device, + &GLOB.backdrop.primitive_buffer, + prim_size, + sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ}, + ) + + prim_array := sdl.MapGPUTransferBuffer(device, GLOB.backdrop.primitive_buffer.transfer, false) + if prim_array == nil { + log.panicf("Failed to map backdrop primitive transfer buffer: %s", sdl.GetError()) + } + mem.copy(prim_array, raw_data(GLOB.tmp_gaussian_blur_primitives), int(prim_size)) + sdl.UnmapGPUTransferBuffer(device, GLOB.backdrop.primitive_buffer.transfer) + + sdl.UploadToGPUBuffer( + pass, + sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.backdrop.primitive_buffer.transfer}, + sdl.GPUBufferRegion{buffer = GLOB.backdrop.primitive_buffer.gpu, offset = 0, size = prim_size}, + false, + ) +} + +//----- Bracket scheduler ---------------------------------- + +// Compute the union AABB of the backdrop primitives in a contiguous-same-sigma sub-batch run +// (one "sigma group"), expanded by 6 sigmas of blur reach (the kernel weight beyond 3σ is +// negligible; halo of 6σ covers both the H-blur reads from downsample and the V-blur reads +// from h_blur, since each pass extends its kernel another 3σ from its output position). +// Returns a viewport in physical pixels for the full-resolution render target; the caller +// divides by the chosen downsample factor for the working-resolution passes. +// +// Per-group (rather than per-layer) because the adaptive downsample picks a different factor +// per sigma, and the kernel reach is also per-sigma. A tighter region per group means less +// fragment work in the downsample and H-blur passes. +//INTERNAL +compute_backdrop_group_work_region :: proc( + group_start, group_end: u32, + sigma_logical: f32, + swapchain_width, swapchain_height: u32, +) -> ( + region_x, region_y, region_w, region_h: u32, +) { + dpi := GLOB.dpi_scaling + has_any := false + min_x: f32 = 0 + min_y: f32 = 0 + max_x: f32 = 0 + max_y: f32 = 0 + + for i in group_start ..< group_end { + batch := GLOB.tmp_sub_batches[i] + if batch.kind != .Backdrop do continue + for p in batch.offset ..< batch.offset + batch.count { + prim := GLOB.tmp_gaussian_blur_primitives[p] + // prim.bounds is in logical pixels (world space). + if !has_any { + min_x = prim.bounds[0] + min_y = prim.bounds[1] + max_x = prim.bounds[2] + max_y = prim.bounds[3] + has_any = true + } else { + if prim.bounds[0] < min_x do min_x = prim.bounds[0] + if prim.bounds[1] < min_y do min_y = prim.bounds[1] + if prim.bounds[2] > max_x do max_x = prim.bounds[2] + if prim.bounds[3] > max_y do max_y = prim.bounds[3] + } + } + } + + if !has_any do return 0, 0, 0, 0 + + // Halo = 6σ. The bracket runs two sequential blur passes (H then V). H reads downsample + // at ±3σ from its output; V reads h_blur at ±3σ from its output. So for V outputs at + // primitive_AABB to be valid, h_blur must be valid at primitive_AABB ±3σ, which requires + // the downsample valid at primitive_AABB ±6σ. + halo_logical := 6.0 * sigma_logical + min_x -= halo_logical + min_y -= halo_logical + max_x += halo_logical + max_y += halo_logical + + // Convert to physical pixels and clamp to swapchain bounds. + phys_min_x := math.max(min_x * dpi, 0) + phys_min_y := math.max(min_y * dpi, 0) + phys_max_x := math.min(max_x * dpi, f32(swapchain_width)) + phys_max_y := math.min(max_y * dpi, f32(swapchain_height)) + + if phys_max_x <= phys_min_x || phys_max_y <= phys_min_y do return 0, 0, 0, 0 + + region_x = u32(phys_min_x) + region_y = u32(phys_min_y) + region_w = u32(phys_max_x - phys_min_x) + region_h = u32(phys_max_y - phys_min_y) + return +} + +// Run the backdrop bracket for one layer. Assumes: +// - source_texture currently holds the pre-bracket frame contents (Pass A has already +// rendered everything that should appear behind the backdrop). +// - The caller has invoked ensure_backdrop_textures with current swapchain dimensions. +// - At least one .Backdrop sub-batch exists in the layer (caller checked). +// +// Per-sigma-group execution. The bracket walks the layer's sub-batches in submission order, +// grouping contiguous-same-sigma .Backdrop sub-batches. For each group: +// 1. Pick a downsample factor using compute_backdrop_downsample_factor. +// 2. Compute that group's work region (primitives' AABB + 6σ halo, clamped). +// 3. Downsample: source_texture → downsample_texture, viewport-limited to +// work_region/factor. Writes into a sub-rect of the working texture. +// 4. H-blur (mode 0, direction=H): downsample_texture → h_blur_texture, same viewport. +// 5. V-blur (mode 0, direction=V): h_blur_texture → downsample_texture (ping-pong reuse; +// downsample_texture's data is no longer needed). Same viewport. +// 6. Composite (mode 1): downsample_texture (now holds H+V blur) → source_texture, full- +// target viewport, per-primitive SDF discard handles masking and applies the tint. Each +// sub-batch in the group is one instanced draw. +// +// V-blur is run as its own working→working pass rather than folded into the composite. The +// folded variant produces a horizontal-vs-vertical asymmetry artifact (horizontal source +// features end up looking sharper than vertical ones inside the panel). Matching V's +// structure exactly to H's restores symmetry. +// +// On exit, source_texture contains the pre-bracket contents plus all backdrop primitives +// composited on top. The caller then runs Pass B (post-bracket non-backdrop sub-batches) on +// source_texture with LOAD. +//INTERNAL +run_backdrop_bracket :: proc( + cmd_buffer: ^sdl.GPUCommandBuffer, + layer: ^Layer, + swapchain_width, swapchain_height: u32, +) { + pipeline := &GLOB.backdrop + + full_viewport := sdl.GPUViewport { + x = 0, + y = 0, + w = f32(swapchain_width), + h = f32(swapchain_height), + min_depth = 0, + max_depth = 1, + } + full_scissor := sdl.Rect { + x = 0, + y = 0, + w = i32(swapchain_width), + h = i32(swapchain_height), + } + + // Working textures are at full swapchain resolution. Each per-group factor=N pass writes + // only to a sub-rect of dimensions (work_region_phys / N), via viewport-limited rendering. + + layer_end := layer.sub_batch_start + layer.sub_batch_len + i := layer.sub_batch_start + for i < layer_end { + batch := GLOB.tmp_sub_batches[i] + if batch.kind != .Backdrop { + i += 1 + continue + } + + // Find the contiguous run of .Backdrop sub-batches with this sigma. + sigma := batch.gaussian_sigma + group_start := i + group_end := i + 1 + for group_end < layer_end { + next := GLOB.tmp_sub_batches[group_end] + if next.kind != .Backdrop || next.gaussian_sigma != sigma do break + group_end += 1 + } + + // Pick downsample factor for this group. + downsample_factor := compute_backdrop_downsample_factor(sigma) + + // Compute this group's work region (primitive AABB + 6σ halo, in physical pixels). + region_x, region_y, region_w, region_h := compute_backdrop_group_work_region( + group_start, + group_end, + sigma, + swapchain_width, + swapchain_height, + ) + if region_w == 0 || region_h == 0 { + i = group_end + continue + } + + // Convert work region to working-resolution coords (divide by factor, ceil-round-up). + working_x := region_x / downsample_factor + working_y := region_y / downsample_factor + working_w := (region_w + downsample_factor - 1) / downsample_factor + working_h := (region_h + downsample_factor - 1) / downsample_factor + + // Working textures are sized at min factor (2). At factor=4 we have only half the texture + // area available in each axis. Clamp to the texture extent for either case. + wt_w := pipeline.cached_width / downsample_factor + wt_h := pipeline.cached_height / downsample_factor + if working_x + working_w > wt_w do working_w = wt_w - working_x + if working_y + working_h > wt_h do working_h = wt_h - working_y + if working_w == 0 || working_h == 0 { + i = group_end + continue + } + + working_viewport := sdl.GPUViewport { + x = f32(working_x), + y = f32(working_y), + w = f32(working_w), + h = f32(working_h), + min_depth = 0, + max_depth = 1, + } + working_scissor := sdl.Rect { + x = i32(working_x), + y = i32(working_y), + w = i32(working_w), + h = i32(working_h), + } + + // inv_working_size is always relative to the actual texture extent (full swapchain res). + // At factor>1 we're only using a sub-rect, but the texture coords are still divided by the + // full texture's dimensions because that's what gl_FragCoord operates on. + inv_working_size := [2]f32{1.0 / f32(pipeline.cached_width), 1.0 / f32(pipeline.cached_height)} + + // Convert the user's logical-pixel sigma into the kernel's working space. + // sigma_working_texels = sigma_logical * dpi_scaling / downsample_factor. + effective_sigma := sigma * GLOB.dpi_scaling / f32(downsample_factor) + frag_uniforms := Gaussian_Blur_Frag_Uniforms { + inv_working_size = inv_working_size, + inv_downsample_factor = 1.0 / f32(downsample_factor), + } + frag_uniforms.pair_count = compute_blur_kernel(effective_sigma, &frag_uniforms.kernel) + + //----- Downsample (source_texture → downsample_texture, viewport-limited) ---------- + { + pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = pipeline.downsample_texture, + load_op = .DONT_CARE, + store_op = .STORE, + cycle = true, + }, + 1, + nil, + ) + sdl.BindGPUGraphicsPipeline(pass, pipeline.downsample_pipeline) + sdl.SetGPUViewport(pass, working_viewport) + sdl.SetGPUScissor(pass, working_scissor) + push_backdrop_downsample_frag_globals( + cmd_buffer, + pipeline.cached_width, + pipeline.cached_height, + downsample_factor, + ) + sdl.BindGPUFragmentSamplers( + pass, + 0, + &sdl.GPUTextureSamplerBinding{texture = pipeline.source_texture, sampler = pipeline.sampler}, + 1, + ) + sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0) + sdl.EndGPURenderPass(pass) + } + + //----- H-blur (mode 0, direction=H): downsample_texture → h_blur_texture -------- + { + frag_uniforms.mode = 0 + frag_uniforms.direction = {1, 0} + + pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = pipeline.h_blur_texture, + load_op = .DONT_CARE, + store_op = .STORE, + cycle = true, + }, + 1, + nil, + ) + sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline) + sdl.SetGPUViewport(pass, working_viewport) + sdl.SetGPUScissor(pass, working_scissor) + // Mode 0's vertex shader is a fullscreen triangle that ignores `projection`; pass + // the standard ortho anyway so the same uniform block works for both modes. + push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 0) + push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms) + // The blur PSO is declared with num_storage_buffers = 1 (mode 1 reads it). SDL3 GPU + // validation requires the binding to be present for *any* draw on this PSO, even + // though mode 0's shader path doesn't actually read it. Bind it here too. + sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1) + sdl.BindGPUFragmentSamplers( + pass, + 0, + &sdl.GPUTextureSamplerBinding{texture = pipeline.downsample_texture, sampler = pipeline.sampler}, + 1, + ) + sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0) + sdl.EndGPURenderPass(pass) + } + + //----- V-blur (mode 0, direction=V): h_blur_texture → downsample_texture -------- + // Ping-pong reuse: downsample_texture's data is no longer needed once H-blur has + // produced its output, so we reuse it as the V-blur target. Saves allocating a third + // working texture. + { + frag_uniforms.mode = 0 + frag_uniforms.direction = {0, 1} + + pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = pipeline.downsample_texture, + load_op = .DONT_CARE, + store_op = .STORE, + cycle = true, + }, + 1, + nil, + ) + sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline) + sdl.SetGPUViewport(pass, working_viewport) + sdl.SetGPUScissor(pass, working_scissor) + push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 0) + push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms) + sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1) + sdl.BindGPUFragmentSamplers( + pass, + 0, + &sdl.GPUTextureSamplerBinding{texture = pipeline.h_blur_texture, sampler = pipeline.sampler}, + 1, + ) + sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0) + sdl.EndGPURenderPass(pass) + } + + //----- Composite (mode 1): downsample_texture (now holds H+V blur) → source_texture -- + // No kernel applied here — the working texture is already fully blurred. The shader just + // upsamples (via bilinear filtering on the read), applies the SDF mask, and applies the + // tint. One render pass for the whole sigma group; each sub-batch issues its own draw + // call because non-contiguous-but-same-sigma sub-batches couldn't coalesce upstream. + { + frag_uniforms.mode = 1 + // direction is unused in mode 1 but keep it set so reading the uniform doesn't see + // undefined data on platforms that care about that. + frag_uniforms.direction = {0, 0} + + pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo{texture = pipeline.source_texture, load_op = .LOAD, store_op = .STORE}, + 1, + nil, + ) + sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline) + sdl.SetGPUViewport(pass, full_viewport) + sdl.SetGPUScissor(pass, full_scissor) + push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 1) + push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms) + sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1) + sdl.BindGPUFragmentSamplers( + pass, + 0, + &sdl.GPUTextureSamplerBinding{texture = pipeline.downsample_texture, sampler = pipeline.sampler}, + 1, + ) + for j in group_start ..< group_end { + grp := GLOB.tmp_sub_batches[j] + sdl.DrawGPUPrimitives(pass, 6, grp.count, 0, grp.offset) + } + sdl.EndGPURenderPass(pass) + } + + i = group_end + } +} + +//----- Primitive builders ---------------------------------- + +// Build a Gaussian_Blur_Primitive with bounds, radii, and feather computed from rectangle +// geometry. The caller sets `color` (tint) on the returned primitive before submitting. +// +// No rotation, no outline — gaussian blur primitives are intentionally limited to axis-aligned +// RRects. Rotation breaks screen-space blur sampling visually; outline would be a specialized +// edge effect that belongs in its own primitive type. +//INTERNAL +build_backdrop_primitive :: proc( + rect: Rectangle, + radii: Rectangle_Radii, + feather_px: f32, +) -> Gaussian_Blur_Primitive { + max_radius := min(rect.width, rect.height) * 0.5 + clamped_top_left := clamp(radii.top_left, 0, max_radius) + clamped_top_right := clamp(radii.top_right, 0, max_radius) + clamped_bottom_right := clamp(radii.bottom_right, 0, max_radius) + clamped_bottom_left := clamp(radii.bottom_left, 0, max_radius) + + half_feather := feather_px * 0.5 + padding := half_feather / GLOB.dpi_scaling + dpi_scale := GLOB.dpi_scaling + + half_width := rect.width * 0.5 + half_height := rect.height * 0.5 + center_x := rect.x + half_width + center_y := rect.y + half_height + + return Gaussian_Blur_Primitive { + bounds = { + center_x - half_width - padding, + center_y - half_height - padding, + center_x + half_width + padding, + center_y + half_height + padding, + }, + // Radii ordering matches the shader's sdRoundedBox swizzle: + // (p.x > 0) ? r.xy : r.zw picks right-vs-left half + // then (p.y > 0) ? rxy.x : rxy.y picks bottom-vs-top within that half + // So slot 0 = bottom-right, slot 1 = top-right, slot 2 = bottom-left, slot 3 = top-left. + radii = { + clamped_bottom_right * dpi_scale, + clamped_top_right * dpi_scale, + clamped_bottom_left * dpi_scale, + clamped_top_left * dpi_scale, + }, + half_size = {half_width * dpi_scale, half_height * dpi_scale}, + half_feather = half_feather, + } +} + +// Append a Gaussian_Blur_Primitive to the staging array and emit a .Backdrop sub-batch +// carrying the requested gaussian_sigma. Sub-batch coalescing in append_or_extend_sub_batch +// will merge contiguous backdrops that share a sigma into a single instanced draw. +//INTERNAL +prepare_backdrop_primitive :: proc(layer: ^Layer, prim: Gaussian_Blur_Primitive, gaussian_sigma: f32) { + offset := u32(len(GLOB.tmp_gaussian_blur_primitives)) + append(&GLOB.tmp_gaussian_blur_primitives, prim) + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + append_or_extend_sub_batch( + scissor, + layer, + .Backdrop, + offset = offset, + count = 1, + gaussian_sigma = gaussian_sigma, + ) +} + +//----- Public API ---------------------------------- + +// Draw a rectangle whose interior samples a Gaussian-blurred snapshot of the framebuffer +// behind it. RRect-only — covers rectangles, rounded rectangles, and circles via +// uniform_radii. +// +// `gaussian_sigma` is the Gaussian standard deviation in logical pixels. Typical UI range is +// 4..24. sigma <= 0 produces a sharp framebuffer mirror (no blur). +// +// `tint` controls the color of the frosted glass: +// - tint.rgb is the tint color. +// - tint.a is the tint *mix strength*, NOT panel opacity. The panel is always fully +// opaque inside its mask (matching real frosted glass and iOS UIBlurEffect / CSS +// backdrop-filter). At alpha=0 the user sees the pure blur unchanged; at alpha=255 +// the blur is fully multiplied by tint.rgb. Intermediate values lerp between the two. +// - For a translucent panel layered over content, draw a separate translucent rect on +// top instead — the backdrop's job is to deliver the blur, not to blend with what's +// beneath it. +// +// Backdrop primitives have no rotation: backdrop sampling is in screen space, so a rotated +// mask over a stationary blur sample would look visually wrong. iOS UIVisualEffectView, +// CSS backdrop-filter, and Flutter BackdropFilter all enforce this implicitly; we enforce +// it explicitly by leaving no rotation parameter. +// +// Within a single layer, primitives sharing the same `gaussian_sigma` share one H+V blur +// pass pair via sub-batch coalescing. Primitives with different sigmas in the same layer +// trigger separate blur passes (cost scales with the number of unique sigmas). +// +// Submission ordering is asymmetric: a non-backdrop draw submitted between two backdrops in +// the same layer renders *on top of* both backdrops, not between them. Use `draw.new_layer` +// to interleave. See README.md § "Backdrop pipeline" for the full bracket scheduling model. +gaussian_blur :: proc( + layer: ^Layer, + rect: Rectangle, + gaussian_sigma: f32, + tint: Color = DFT_TINT, + radii: Rectangle_Radii = {}, + feather_px: f32 = DFT_FEATHER_PX, +) { + prim := build_backdrop_primitive(rect, radii, feather_px) + prim.color = tint + prepare_backdrop_primitive(layer, prim, gaussian_sigma) +} diff --git a/draw/core_2d.odin b/draw/core_2d.odin new file mode 100644 index 0000000..b441911 --- /dev/null +++ b/draw/core_2d.odin @@ -0,0 +1,1601 @@ +package draw + +import "core:c" +import "core:log" +import "core:math" +import "core:mem" +import sdl "vendor:sdl3" +import sdl_ttf "vendor:sdl3/ttf" + +//----- Vertex layout ---------------------------------- + +// Vertex layout for tessellated and text geometry. +// IMPORTANT: `color` must be premultiplied alpha (RGB channels pre-scaled by alpha). +// The tessellated fragment shader passes vertex color through directly — it does NOT +// premultiply. The blend state is ONE, ONE_MINUS_SRC_ALPHA (premultiplied-over). +// Use `premultiply_color` when constructing vertices manually for `prepare_shape`. +Vertex_2D :: struct { + position: Vec2, + uv: [2]f32, + color: Color, +} + +//INTERNAL +Text_Batch :: struct { + atlas_texture: ^sdl.GPUTexture, + vertex_start: u32, + vertex_count: u32, + index_start: u32, + index_count: u32, +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Primitive types ------------ +// --------------------------------------------------------------------------------------------------------------------- + +// The SDF path evaluates one of four signed distance functions per primitive, dispatched +// by Shape_Kind encoded in the low byte of Core_2D_Primitive.flags: +// +// RRect — rounded rectangle with per-corner radii (sdRoundedBox). Also covers circles +// (uniform radii = half-size), capsule-style line segments (rotated, max rounding), +// and other RRect-reducible shapes. +// NGon — regular polygon with N sides and optional rounding. +// Ellipse — approximate ellipse (non-exact SDF, suitable for UI but not for shape merging). +// Ring_Arc — annular ring with optional angular clipping. Covers full rings, partial arcs, +// pie slices (inner_radius = 0), and loading spinners. +//INTERNAL +Shape_Kind :: enum u8 { + Solid = 0, // tessellated path (mode marker; not a real SDF kind) + RRect = 1, + NGon = 2, + Ellipse = 3, + Ring_Arc = 4, +} + +//INTERNAL +Shape_Flag :: enum u8 { + Textured, // bit 0: sample texture using uv_rect (mutually exclusive with Gradient via Brush union) + Gradient, // bit 1: 2-color gradient using effects.gradient_color as end/outer color + Gradient_Radial, // bit 2: if set with Gradient, radial from center; else linear at angle + Outline, // bit 3: outer outline band using effects.outline_color; CPU expands bounds by outline_width + Rotated, // bit 4: shape has non-zero rotation; rotation_sc contains packed sin/cos + Arc_Narrow, // bit 5: ring arc span ≤ π — intersect half-planes. Neither Arc bit = full ring. + Arc_Wide, // bit 6: ring arc span > π — union half-planes. Neither Arc bit = full ring. +} + +//INTERNAL +Shape_Flags :: bit_set[Shape_Flag;u8] + +//INTERNAL +RRect_Params :: struct { + half_size: [2]f32, + radii: [4]f32, + half_feather: f32, // feather_px * 0.5; shader uses smoothstep(-h, h, d) + _: f32, +} + +//INTERNAL +NGon_Params :: struct { + radius: f32, + sides: f32, + half_feather: f32, // feather_px * 0.5; shader uses smoothstep(-h, h, d) + _: [5]f32, +} + +//INTERNAL +Ellipse_Params :: struct { + radii: [2]f32, + half_feather: f32, // feather_px * 0.5; shader uses smoothstep(-h, h, d) + _: [5]f32, +} + +//INTERNAL +Ring_Arc_Params :: struct { + inner_radius: f32, // inner radius in physical pixels (0 for pie slice) + outer_radius: f32, // outer radius in physical pixels + normal_start: [2]f32, // pre-computed outward normal of start edge: (sin(start), -cos(start)) + normal_end: [2]f32, // pre-computed outward normal of end edge: (-sin(end), cos(end)) + half_feather: f32, // feather_px * 0.5; shader uses smoothstep(-h, h, d) + _: f32, +} + +//INTERNAL +Shape_Params :: struct #raw_union { + rrect: RRect_Params, + ngon: NGon_Params, + ellipse: Ellipse_Params, + ring_arc: Ring_Arc_Params, + raw: [8]f32, +} +#assert(size_of(Shape_Params) == 32) + +// GPU-side storage for 2-color gradient parameters and/or outline parameters. +// Packed into 16 bytes. Independent from uv_rect — texture and outline can coexist. +// The shader reads gradient_color and outline_color via unpackUnorm4x8. +// gradient_dir_sc stores the pre-computed gradient direction as (cos, sin) in f16 pair +// via unpackHalf2x16. outline_packed stores outline_width as f16 via unpackHalf2x16. +//INTERNAL +Gradient_Outline :: struct { + gradient_color: Color, // 0: end (linear) or outer (radial) gradient color + outline_color: Color, // 4: outline band color + gradient_dir_sc: u32, // 8: packed f16 pair: low = cos(angle), high = sin(angle) — pre-computed gradient direction + outline_packed: u32, // 12: packed f16 pair: low = outline_width (f16, physical pixels), high = reserved +} +#assert(size_of(Gradient_Outline) == 16) + +// GPU layout: 96 bytes, std430-compatible. The shader declares this as a storage buffer struct. +// The low byte of `flags` encodes the Shape_Kind (0 = tessellated, 1-4 = SDF kinds). +// Bits 8-15 encode Shape_Flags (Textured, Gradient, Gradient_Radial, Outline, Rotated, Arc_Narrow, Arc_Wide). +// rotation_sc stores pre-computed sin/cos of the rotation angle as a packed f16 pair, +// avoiding per-pixel trigonometry in the fragment shader. Only read when .Rotated is set. +// +// Named Core_2D_Primitive (not just Primitive) to disambiguate from Gaussian_Blur_Primitive +// (and any future per-effect primitive types) in backdrop.odin. Each path/effect's primitive +// type has its own GPU layout and fragment-shader contract; pairing each with its own +// primitive type keeps cross-references unambiguous when grepping the codebase. +//INTERNAL +Core_2D_Primitive :: struct { + bounds: [4]f32, // 0: min_x, min_y, max_x, max_y (world-space, pre-DPI) + color: Color, // 16: u8x4, fill color / gradient start color / texture tint + flags: u32, // 20: low byte = Shape_Kind, bits 8+ = Shape_Flags + rotation_sc: u32, // 24: packed f16 pair: low = sin(angle), high = cos(angle). Requires .Rotated flag. + _pad: f32, // 28: reserved for future use + params: Shape_Params, // 32: per-kind shape parameters (raw union, 32 bytes) + uv_rect: [4]f32, // 64: texture UV coordinates (u_min, v_min, u_max, v_max). Read when .Textured. + effects: Gradient_Outline, // 80: gradient and/or outline parameters. Read when .Gradient and/or .Outline. +} +#assert(size_of(Core_2D_Primitive) == 96) + +// Pack shape kind and flags into the Core_2D_Primitive.flags field. The low byte encodes the +// Shape_Kind (which also serves as the SDF mode marker — kind > 0 means SDF path). The +// tessellated path leaves the field at 0 (Solid kind, set by vertex shader zero-initialization). +//INTERNAL +pack_kind_flags :: #force_inline proc(kind: Shape_Kind, flags: Shape_Flags) -> u32 { + return u32(kind) | (u32(transmute(u8)flags) << 8) +} + +// Pack two f16 values into a single u32 for GPU consumption via unpackHalf2x16. +// Used to pack gradient_dir_sc (cos/sin) and outline_packed (width/reserved) in Gradient_Outline. +//INTERNAL +pack_f16_pair :: #force_inline proc(low, high: f16) -> u32 { + return u32(transmute(u16)low) | (u32(transmute(u16)high) << 16) +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Subsystem lifecycle ------------ +// --------------------------------------------------------------------------------------------------------------------- + +//INTERNAL +Core_2D :: struct { + sdl_pipeline: ^sdl.GPUGraphicsPipeline, + vertex_buffer: Buffer, + index_buffer: Buffer, + unit_quad_buffer: ^sdl.GPUBuffer, + primitive_buffer: Buffer, + white_texture: ^sdl.GPUTexture, + sampler: ^sdl.GPUSampler, +} + +// MSAA is not supported by levlib (see init's doc comment in draw.odin); the PSO is hard-wired +// to single-sample. SDF text and shapes provide analytical AA via smoothstep; tessellated user +// geometry is not anti-aliased. +//INTERNAL +create_core_2d :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window) -> (core_2d: Core_2D, ok: bool) { + // On failure, clean up any partially-created resources + defer if !ok { + if core_2d.sampler != nil do sdl.ReleaseGPUSampler(device, core_2d.sampler) + if core_2d.white_texture != nil do sdl.ReleaseGPUTexture(device, core_2d.white_texture) + if core_2d.unit_quad_buffer != nil do sdl.ReleaseGPUBuffer(device, core_2d.unit_quad_buffer) + if core_2d.primitive_buffer.gpu != nil do destroy_buffer(device, &core_2d.primitive_buffer) + if core_2d.index_buffer.gpu != nil do destroy_buffer(device, &core_2d.index_buffer) + if core_2d.vertex_buffer.gpu != nil do destroy_buffer(device, &core_2d.vertex_buffer) + if core_2d.sdl_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, core_2d.sdl_pipeline) + } + + active_shader_formats := sdl.GetGPUShaderFormats(device) + if PLATFORM_SHADER_FORMAT_FLAG not_in active_shader_formats { + log.errorf( + "draw: no embedded shader matches active GPU formats; this build supports %v but device reports %v", + PLATFORM_SHADER_FORMAT, + active_shader_formats, + ) + return core_2d, false + } + + log.debug("Loaded", len(BASE_VERT_2D_RAW), "vert bytes") + log.debug("Loaded", len(BASE_FRAG_2D_RAW), "frag bytes") + + vert_info := sdl.GPUShaderCreateInfo { + code_size = len(BASE_VERT_2D_RAW), + code = raw_data(BASE_VERT_2D_RAW), + entrypoint = SHADER_ENTRY, + format = {PLATFORM_SHADER_FORMAT_FLAG}, + stage = .VERTEX, + num_uniform_buffers = 1, + num_storage_buffers = 1, + } + + frag_info := sdl.GPUShaderCreateInfo { + code_size = len(BASE_FRAG_2D_RAW), + code = raw_data(BASE_FRAG_2D_RAW), + entrypoint = SHADER_ENTRY, + format = {PLATFORM_SHADER_FORMAT_FLAG}, + stage = .FRAGMENT, + num_samplers = 1, + } + + vert_shader := sdl.CreateGPUShader(device, vert_info) + if vert_shader == nil { + log.errorf("Could not create draw vertex shader: %s", sdl.GetError()) + return core_2d, false + } + + frag_shader := sdl.CreateGPUShader(device, frag_info) + if frag_shader == nil { + sdl.ReleaseGPUShader(device, vert_shader) + log.errorf("Could not create draw fragment shader: %s", sdl.GetError()) + return core_2d, false + } + + vertex_attributes: [3]sdl.GPUVertexAttribute = { + // position (GLSL location 0) + sdl.GPUVertexAttribute{buffer_slot = 0, location = 0, format = .FLOAT2, offset = 0}, + // uv (GLSL location 1) + sdl.GPUVertexAttribute{buffer_slot = 0, location = 1, format = .FLOAT2, offset = size_of([2]f32)}, + // color (GLSL location 2, u8x4 normalized to float by GPU) + sdl.GPUVertexAttribute{buffer_slot = 0, location = 2, format = .UBYTE4_NORM, offset = size_of([2]f32) * 2}, + } + + pipeline_info := sdl.GPUGraphicsPipelineCreateInfo { + vertex_shader = vert_shader, + fragment_shader = frag_shader, + primitive_type = .TRIANGLELIST, + multisample_state = sdl.GPUMultisampleState{sample_count = ._1}, + target_info = sdl.GPUGraphicsPipelineTargetInfo { + color_target_descriptions = &sdl.GPUColorTargetDescription { + format = sdl.GetGPUSwapchainTextureFormat(device, window), + // Premultiplied-alpha blending: src outputs RGB pre-multiplied by alpha, + // so src factor is ONE (not SRC_ALPHA). This eliminates the per-pixel + // divide in the outline path and is the standard blend mode used by + // Skia, Flutter, and GPUI. + blend_state = sdl.GPUColorTargetBlendState { + enable_blend = true, + enable_color_write_mask = true, + src_color_blendfactor = .ONE, + dst_color_blendfactor = .ONE_MINUS_SRC_ALPHA, + color_blend_op = .ADD, + src_alpha_blendfactor = .ONE, + dst_alpha_blendfactor = .ONE_MINUS_SRC_ALPHA, + alpha_blend_op = .ADD, + color_write_mask = sdl.GPUColorComponentFlags{.R, .G, .B, .A}, + }, + }, + num_color_targets = 1, + }, + vertex_input_state = sdl.GPUVertexInputState { + vertex_buffer_descriptions = &sdl.GPUVertexBufferDescription { + slot = 0, + input_rate = .VERTEX, + pitch = size_of(Vertex_2D), + }, + num_vertex_buffers = 1, + vertex_attributes = raw_data(vertex_attributes[:]), + num_vertex_attributes = 3, + }, + } + + core_2d.sdl_pipeline = sdl.CreateGPUGraphicsPipeline(device, pipeline_info) + // Shaders are no longer needed regardless of pipeline creation success + sdl.ReleaseGPUShader(device, vert_shader) + sdl.ReleaseGPUShader(device, frag_shader) + if core_2d.sdl_pipeline == nil { + log.errorf("Failed to create draw graphics pipeline: %s", sdl.GetError()) + return core_2d, false + } + + // Create vertex buffer + vert_buf_ok: bool + core_2d.vertex_buffer, vert_buf_ok = create_buffer( + device, + size_of(Vertex_2D) * BUFFER_INIT_SIZE, + sdl.GPUBufferUsageFlags{.VERTEX}, + ) + if !vert_buf_ok do return core_2d, false + + // Create index buffer (used by text) + idx_buf_ok: bool + core_2d.index_buffer, idx_buf_ok = create_buffer( + device, + size_of(c.int) * BUFFER_INIT_SIZE, + sdl.GPUBufferUsageFlags{.INDEX}, + ) + if !idx_buf_ok do return core_2d, false + + // Create primitive storage buffer (used by SDF instanced drawing) + prim_buf_ok: bool + core_2d.primitive_buffer, prim_buf_ok = create_buffer( + device, + size_of(Core_2D_Primitive) * BUFFER_INIT_SIZE, + sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ}, + ) + if !prim_buf_ok do return core_2d, false + + // Create static 6-vertex unit quad buffer (two triangles, TRIANGLELIST) + core_2d.unit_quad_buffer = sdl.CreateGPUBuffer( + device, + sdl.GPUBufferCreateInfo{usage = {.VERTEX}, size = 6 * size_of(Vertex_2D)}, + ) + if core_2d.unit_quad_buffer == nil { + log.errorf("Failed to create unit quad buffer: %s", sdl.GetError()) + return core_2d, false + } + + // Create 1x1 white pixel texture + core_2d.white_texture = sdl.CreateGPUTexture( + device, + sdl.GPUTextureCreateInfo { + type = .D2, + format = .R8G8B8A8_UNORM, + usage = {.SAMPLER}, + width = 1, + height = 1, + layer_count_or_depth = 1, + num_levels = 1, + sample_count = ._1, + }, + ) + if core_2d.white_texture == nil { + log.errorf("Failed to create white pixel texture: %s", sdl.GetError()) + return core_2d, false + } + + // Upload white pixel and unit quad data in a single command buffer + white_pixel := Color{255, 255, 255, 255} + white_transfer_buf := sdl.CreateGPUTransferBuffer( + device, + sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = size_of(white_pixel)}, + ) + if white_transfer_buf == nil { + log.errorf("Failed to create white pixel transfer buffer: %s", sdl.GetError()) + return core_2d, false + } + defer sdl.ReleaseGPUTransferBuffer(device, white_transfer_buf) + + white_ptr := sdl.MapGPUTransferBuffer(device, white_transfer_buf, false) + if white_ptr == nil { + log.errorf("Failed to map white pixel transfer buffer: %s", sdl.GetError()) + return core_2d, false + } + mem.copy(white_ptr, &white_pixel, size_of(white_pixel)) + sdl.UnmapGPUTransferBuffer(device, white_transfer_buf) + + quad_verts := [6]Vertex_2D { + {position = {0, 0}}, + {position = {1, 0}}, + {position = {0, 1}}, + {position = {0, 1}}, + {position = {1, 0}}, + {position = {1, 1}}, + } + quad_transfer_buf := sdl.CreateGPUTransferBuffer( + device, + sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = size_of(quad_verts)}, + ) + if quad_transfer_buf == nil { + log.errorf("Failed to create unit quad transfer buffer: %s", sdl.GetError()) + return core_2d, false + } + defer sdl.ReleaseGPUTransferBuffer(device, quad_transfer_buf) + + quad_ptr := sdl.MapGPUTransferBuffer(device, quad_transfer_buf, false) + if quad_ptr == nil { + log.errorf("Failed to map unit quad transfer buffer: %s", sdl.GetError()) + return core_2d, false + } + mem.copy(quad_ptr, &quad_verts, size_of(quad_verts)) + sdl.UnmapGPUTransferBuffer(device, quad_transfer_buf) + + upload_cmd_buffer := sdl.AcquireGPUCommandBuffer(device) + if upload_cmd_buffer == nil { + log.errorf("Failed to acquire command buffer for init upload: %s", sdl.GetError()) + return core_2d, false + } + upload_pass := sdl.BeginGPUCopyPass(upload_cmd_buffer) + + sdl.UploadToGPUTexture( + upload_pass, + sdl.GPUTextureTransferInfo{transfer_buffer = white_transfer_buf}, + sdl.GPUTextureRegion{texture = core_2d.white_texture, w = 1, h = 1, d = 1}, + false, + ) + + sdl.UploadToGPUBuffer( + upload_pass, + sdl.GPUTransferBufferLocation{transfer_buffer = quad_transfer_buf}, + sdl.GPUBufferRegion{buffer = core_2d.unit_quad_buffer, offset = 0, size = size_of(quad_verts)}, + false, + ) + + sdl.EndGPUCopyPass(upload_pass) + if !sdl.SubmitGPUCommandBuffer(upload_cmd_buffer) { + log.errorf("Failed to submit init upload command buffer: %s", sdl.GetError()) + return core_2d, false + } + + log.debug("White pixel texture and unit quad buffer created and uploaded") + + // Create sampler (shared by shapes and text) + core_2d.sampler = sdl.CreateGPUSampler( + device, + sdl.GPUSamplerCreateInfo { + min_filter = .LINEAR, + mag_filter = .LINEAR, + mipmap_mode = .LINEAR, + address_mode_u = .CLAMP_TO_EDGE, + address_mode_v = .CLAMP_TO_EDGE, + address_mode_w = .CLAMP_TO_EDGE, + }, + ) + if core_2d.sampler == nil { + log.errorf("Could not create GPU sampler: %s", sdl.GetError()) + return core_2d, false + } + + log.debug("Done creating core 2D subsystem") + return core_2d, true +} + +//INTERNAL +destroy_core_2d :: proc(device: ^sdl.GPUDevice, core: ^Core_2D) { + destroy_buffer(device, &core.vertex_buffer) + destroy_buffer(device, &core.index_buffer) + destroy_buffer(device, &core.primitive_buffer) + if core.unit_quad_buffer != nil { + sdl.ReleaseGPUBuffer(device, core.unit_quad_buffer) + } + sdl.ReleaseGPUTexture(device, core.white_texture) + sdl.ReleaseGPUSampler(device, core.sampler) + sdl.ReleaseGPUGraphicsPipeline(device, core.sdl_pipeline) +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Upload and render ------------ +// --------------------------------------------------------------------------------------------------------------------- + +//----- Vertex uniforms ---------------------------------- + +//INTERNAL +Core_2D_Mode :: enum u32 { + Tessellated = 0, + SDF = 1, +} + +//INTERNAL +Vertex_Uniforms_2D :: struct { + projection: matrix[4, 4]f32, + scale: f32, + mode: Core_2D_Mode, +} + +// Push projection, dpi scale, and rendering mode as a single uniform block (slot 0). +//INTERNAL +push_globals :: proc( + cmd_buffer: ^sdl.GPUCommandBuffer, + width: f32, + height: f32, + mode: Core_2D_Mode = .Tessellated, +) { + globals := Vertex_Uniforms_2D { + projection = ortho_rh( + left = 0.0, + top = 0.0, + right = f32(width), + bottom = f32(height), + near = -1.0, + far = 1.0, + ), + scale = GLOB.dpi_scaling, + mode = mode, + } + + sdl.PushGPUVertexUniformData(cmd_buffer, 0, &globals, size_of(Vertex_Uniforms_2D)) +} + +//----- Per-frame upload ---------------------------------- + +//INTERNAL +upload :: proc(device: ^sdl.GPUDevice, pass: ^sdl.GPUCopyPass) { + // Upload vertices (shapes then text into one buffer) + shape_vert_count := u32(len(GLOB.tmp_shape_verts)) + text_vert_count := u32(len(GLOB.tmp_text_verts)) + total_vert_count := shape_vert_count + text_vert_count + + if total_vert_count > 0 { + total_vert_size := total_vert_count * size_of(Vertex_2D) + shape_vert_size := shape_vert_count * size_of(Vertex_2D) + text_vert_size := text_vert_count * size_of(Vertex_2D) + + grow_buffer_if_needed( + device, + &GLOB.core_2d.vertex_buffer, + total_vert_size, + sdl.GPUBufferUsageFlags{.VERTEX}, + ) + + vert_array := sdl.MapGPUTransferBuffer(device, GLOB.core_2d.vertex_buffer.transfer, false) + if vert_array == nil { + log.panicf("Failed to map vertex transfer buffer: %s", sdl.GetError()) + } + if shape_vert_size > 0 { + mem.copy(vert_array, raw_data(GLOB.tmp_shape_verts), int(shape_vert_size)) + } + if text_vert_size > 0 { + mem.copy( + rawptr(uintptr(vert_array) + uintptr(shape_vert_size)), + raw_data(GLOB.tmp_text_verts), + int(text_vert_size), + ) + } + sdl.UnmapGPUTransferBuffer(device, GLOB.core_2d.vertex_buffer.transfer) + + sdl.UploadToGPUBuffer( + pass, + sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.core_2d.vertex_buffer.transfer}, + sdl.GPUBufferRegion{buffer = GLOB.core_2d.vertex_buffer.gpu, offset = 0, size = total_vert_size}, + false, + ) + } + + // Upload text indices + index_count := u32(len(GLOB.tmp_text_indices)) + if index_count > 0 { + index_size := index_count * size_of(c.int) + + grow_buffer_if_needed(device, &GLOB.core_2d.index_buffer, index_size, sdl.GPUBufferUsageFlags{.INDEX}) + + idx_array := sdl.MapGPUTransferBuffer(device, GLOB.core_2d.index_buffer.transfer, false) + if idx_array == nil { + log.panicf("Failed to map index transfer buffer: %s", sdl.GetError()) + } + mem.copy(idx_array, raw_data(GLOB.tmp_text_indices), int(index_size)) + sdl.UnmapGPUTransferBuffer(device, GLOB.core_2d.index_buffer.transfer) + + sdl.UploadToGPUBuffer( + pass, + sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.core_2d.index_buffer.transfer}, + sdl.GPUBufferRegion{buffer = GLOB.core_2d.index_buffer.gpu, offset = 0, size = index_size}, + false, + ) + } + + // Upload SDF primitives + prim_count := u32(len(GLOB.tmp_primitives)) + if prim_count > 0 { + prim_size := prim_count * size_of(Core_2D_Primitive) + + grow_buffer_if_needed( + device, + &GLOB.core_2d.primitive_buffer, + prim_size, + sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ}, + ) + + prim_array := sdl.MapGPUTransferBuffer(device, GLOB.core_2d.primitive_buffer.transfer, false) + if prim_array == nil { + log.panicf("Failed to map primitive transfer buffer: %s", sdl.GetError()) + } + mem.copy(prim_array, raw_data(GLOB.tmp_primitives), int(prim_size)) + sdl.UnmapGPUTransferBuffer(device, GLOB.core_2d.primitive_buffer.transfer) + + sdl.UploadToGPUBuffer( + pass, + sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.core_2d.primitive_buffer.transfer}, + sdl.GPUBufferRegion{buffer = GLOB.core_2d.primitive_buffer.gpu, offset = 0, size = prim_size}, + false, + ) + } +} + +//----- Layer dispatch ---------------------------------- + +//INTERNAL +draw_layer :: proc( + device: ^sdl.GPUDevice, + window: ^sdl.Window, + cmd_buffer: ^sdl.GPUCommandBuffer, + render_texture: ^sdl.GPUTexture, + swapchain_width: u32, + swapchain_height: u32, + clear_color: [4]f32, + layer: ^Layer, +) { + if layer.sub_batch_len == 0 { + if !GLOB.cleared { + pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = render_texture, + clear_color = sdl.FColor{clear_color[0], clear_color[1], clear_color[2], clear_color[3]}, + load_op = .CLEAR, + store_op = .STORE, + }, + 1, + nil, + ) + sdl.EndGPURenderPass(pass) + GLOB.cleared = true + } + return + } + + bracket_start_abs := find_first_backdrop_in_layer(layer) + layer_end_abs := int(layer.sub_batch_start + layer.sub_batch_len) + + if bracket_start_abs < 0 { + // Fast path: no backdrop in this layer; render the whole sub-batch range in one pass. + render_layer_sub_batch_range( + cmd_buffer, + render_texture, + swapchain_width, + swapchain_height, + clear_color, + layer, + int(layer.sub_batch_start), + layer_end_abs, + ) + return + } + + // Bracketed layer: Pass A → backdrop bracket → Pass B. + // See README.md § "Backdrop pipeline" for the full ordering semantics. + render_layer_sub_batch_range( + cmd_buffer, + render_texture, + swapchain_width, + swapchain_height, + clear_color, + layer, + int(layer.sub_batch_start), + bracket_start_abs, + ) + + run_backdrop_bracket(cmd_buffer, layer, swapchain_width, swapchain_height) + + // Pass B: render the [bracket_start_abs, layer_end_abs) range. .Backdrop sub-batches in + // this range are dispatched by the bracket above and ignored here (the .Backdrop case in + // the inner switch is a no-op). LOAD is implied because Pass A or the bracket's V- + // composite has already touched render_texture. + render_layer_sub_batch_range( + cmd_buffer, + render_texture, + swapchain_width, + swapchain_height, + clear_color, + layer, + bracket_start_abs, + layer_end_abs, + ) +} + +// Render a sub-range of a layer's sub-batches in a single render pass. Iterates the layer's +// scissors and walks each scissor's sub-batches, dispatching by kind. The `range_start_abs` +// and `range_end_abs` parameters are absolute indices into GLOB.tmp_sub_batches; only sub- +// batches within `[range_start_abs, range_end_abs)` are drawn. +// +// .Backdrop sub-batches in the range are always silently skipped — they are dispatched by +// run_backdrop_bracket, not here. The empty .Backdrop case in the inner switch enforces this. +// +// Render-pass setup mirrors the original draw_layer: clear-or-load based on GLOB.cleared, +// pipeline + storage + index buffer bound up front, then per-batch state tracking. After this +// proc returns, GLOB.cleared is guaranteed true. +// +// If the range is empty after filtering (no eligible sub-batches at all), this proc still +// honors the no-clear-yet contract by issuing a clear-only pass when needed; otherwise it +// returns without opening a render pass. +//INTERNAL +render_layer_sub_batch_range :: proc( + cmd_buffer: ^sdl.GPUCommandBuffer, + render_texture: ^sdl.GPUTexture, + swapchain_width: u32, + swapchain_height: u32, + clear_color: [4]f32, + layer: ^Layer, + range_start_abs: int, + range_end_abs: int, +) { + if range_start_abs >= range_end_abs { + // Empty range. If we still owe a clear, do a clear-only pass; otherwise nothing to do. + if !GLOB.cleared { + pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = render_texture, + clear_color = sdl.FColor{clear_color[0], clear_color[1], clear_color[2], clear_color[3]}, + load_op = .CLEAR, + store_op = .STORE, + }, + 1, + nil, + ) + sdl.EndGPURenderPass(pass) + GLOB.cleared = true + } + return + } + + render_pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = render_texture, + clear_color = sdl.FColor{clear_color[0], clear_color[1], clear_color[2], clear_color[3]}, + load_op = GLOB.cleared ? .LOAD : .CLEAR, + store_op = .STORE, + }, + 1, + nil, + ) + GLOB.cleared = true + + sdl.BindGPUGraphicsPipeline(render_pass, GLOB.core_2d.sdl_pipeline) + + // Bind storage buffer (read by vertex shader in SDF mode) + sdl.BindGPUVertexStorageBuffers(render_pass, 0, ([^]^sdl.GPUBuffer)(&GLOB.core_2d.primitive_buffer.gpu), 1) + + // Always bind index buffer — harmless if no indexed draws are issued + sdl.BindGPUIndexBuffer( + render_pass, + sdl.GPUBufferBinding{buffer = GLOB.core_2d.index_buffer.gpu, offset = 0}, + ._32BIT, + ) + + // Shorthand aliases for frequently-used pipeline resources + main_vert_buf := GLOB.core_2d.vertex_buffer.gpu + unit_quad := GLOB.core_2d.unit_quad_buffer + white_texture := GLOB.core_2d.white_texture + sampler := GLOB.core_2d.sampler + width := f32(swapchain_width) + height := f32(swapchain_height) + + // Initial GPU state: tessellated mode, main vertex buffer, no atlas bound yet + push_globals(cmd_buffer, width, height, .Tessellated) + sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = main_vert_buf, offset = 0}, 1) + + current_mode: Core_2D_Mode = .Tessellated + current_vert_buf := main_vert_buf + current_atlas: ^sdl.GPUTexture + current_sampler := sampler + + // Text vertices live after shape vertices in the GPU vertex buffer + text_vertex_gpu_base := u32(len(GLOB.tmp_shape_verts)) + + for &scissor in GLOB.scissors[layer.scissor_start:][:layer.scissor_len] { + // Intersect this scissor's sub-batch span with the requested range. + scissor_start := int(scissor.sub_batch_start) + scissor_end := scissor_start + int(scissor.sub_batch_len) + effective_start := max(scissor_start, range_start_abs) + effective_end := min(scissor_end, range_end_abs) + if effective_start >= effective_end do continue + + sdl.SetGPUScissor(render_pass, scissor.bounds) + + for abs_idx in effective_start ..< effective_end { + batch := &GLOB.tmp_sub_batches[abs_idx] + switch batch.kind { + case .Tessellated: + if current_mode != .Tessellated { + push_globals(cmd_buffer, width, height, .Tessellated) + current_mode = .Tessellated + } + if current_vert_buf != main_vert_buf { + sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = main_vert_buf, offset = 0}, 1) + current_vert_buf = main_vert_buf + } + // Determine texture and sampler for this batch + batch_texture: ^sdl.GPUTexture = white_texture + batch_sampler: ^sdl.GPUSampler = sampler + if batch.texture_id != INVALID_TEXTURE { + if bound_texture := texture_gpu_handle(batch.texture_id); bound_texture != nil { + batch_texture = bound_texture + } + batch_sampler = get_sampler(batch.sampler) + } + if current_atlas != batch_texture || current_sampler != batch_sampler { + sdl.BindGPUFragmentSamplers( + render_pass, + 0, + &sdl.GPUTextureSamplerBinding{texture = batch_texture, sampler = batch_sampler}, + 1, + ) + current_atlas = batch_texture + current_sampler = batch_sampler + } + sdl.DrawGPUPrimitives(render_pass, batch.count, 1, batch.offset, 0) + + case .Text: + if current_mode != .Tessellated { + push_globals(cmd_buffer, width, height, .Tessellated) + current_mode = .Tessellated + } + if current_vert_buf != main_vert_buf { + sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = main_vert_buf, offset = 0}, 1) + current_vert_buf = main_vert_buf + } + text_batch := &GLOB.tmp_text_batches[batch.offset] + if current_atlas != text_batch.atlas_texture { + sdl.BindGPUFragmentSamplers( + render_pass, + 0, + &sdl.GPUTextureSamplerBinding{texture = text_batch.atlas_texture, sampler = sampler}, + 1, + ) + current_atlas = text_batch.atlas_texture + } + sdl.DrawGPUIndexedPrimitives( + render_pass, + text_batch.index_count, + 1, + text_batch.index_start, + i32(text_vertex_gpu_base + text_batch.vertex_start), + 0, + ) + + case .SDF: + if current_mode != .SDF { + push_globals(cmd_buffer, width, height, .SDF) + current_mode = .SDF + } + if current_vert_buf != unit_quad { + sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = unit_quad, offset = 0}, 1) + current_vert_buf = unit_quad + } + // Determine texture and sampler for this batch + batch_texture: ^sdl.GPUTexture = white_texture + batch_sampler: ^sdl.GPUSampler = sampler + if batch.texture_id != INVALID_TEXTURE { + if bound_texture := texture_gpu_handle(batch.texture_id); bound_texture != nil { + batch_texture = bound_texture + } + batch_sampler = get_sampler(batch.sampler) + } + if current_atlas != batch_texture || current_sampler != batch_sampler { + sdl.BindGPUFragmentSamplers( + render_pass, + 0, + &sdl.GPUTextureSamplerBinding{texture = batch_texture, sampler = batch_sampler}, + 1, + ) + current_atlas = batch_texture + current_sampler = batch_sampler + } + sdl.DrawGPUPrimitives(render_pass, 6, batch.count, 0, batch.offset) + + case .Backdrop: + // Always a no-op here. Backdrop sub-batches are dispatched by run_backdrop_bracket; + // when this proc encounters one (only possible in Pass B, since Pass A and the no- + // backdrop fast path both stop their range before any .Backdrop index), we skip it. + } + } + } + + sdl.EndGPURenderPass(render_pass) +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Submission helpers ------------ +// --------------------------------------------------------------------------------------------------------------------- + +// Submit shape vertices (colored triangles) to the given layer for rendering. +// TODO: Should probably be renamed to better match tesselated naming conventions in the library. +prepare_shape :: proc(layer: ^Layer, vertices: []Vertex_2D) { + if len(vertices) == 0 do return + offset := u32(len(GLOB.tmp_shape_verts)) + append(&GLOB.tmp_shape_verts, ..vertices) + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + append_or_extend_sub_batch(scissor, layer, .Tessellated, offset, u32(len(vertices))) +} + +// Submit an SDF primitive to the given layer for rendering. Requires the caller to build a +// Core_2D_Primitive directly, which is the internal GPU-layout struct. +//INTERNAL +prepare_sdf_primitive :: proc(layer: ^Layer, prim: Core_2D_Primitive) { + offset := u32(len(GLOB.tmp_primitives)) + append(&GLOB.tmp_primitives, prim) + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + append_or_extend_sub_batch(scissor, layer, .SDF, offset, 1) +} + +// Submit an SDF primitive with optional texture binding. +// The texture-aware counterpart of `prepare_sdf_primitive`; lets shape procs route a +// texture_id and sampler into the sub-batch without growing the public API. +//INTERNAL +prepare_sdf_primitive_ex :: proc( + layer: ^Layer, + prim: Core_2D_Primitive, + texture_id: Texture_Id = INVALID_TEXTURE, + sampler: Sampler_Preset = DFT_SAMPLER, +) { + offset := u32(len(GLOB.tmp_primitives)) + append(&GLOB.tmp_primitives, prim) + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + append_or_extend_sub_batch(scissor, layer, .SDF, offset, 1, texture_id, sampler) +} + +// Submit a text element to the given layer for rendering. +// Copies SDL_ttf vertices directly (with baked position) and copies indices for indexed drawing. +//INTERNAL +prepare_text :: proc(layer: ^Layer, text: Text) { + data := sdl_ttf.GetGPUTextDrawData(text.sdl_text) + if data == nil { + return // nil is normal for empty text + } + + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + + // Snap base position to integer physical pixels to avoid atlas sub-pixel + // sampling blur (and the off-by-one bottom-row clip that comes with it). + base_x := math.round(text.position[0] * GLOB.dpi_scaling) + base_y := math.round(text.position[1] * GLOB.dpi_scaling) + + // Premultiply text color once — reused across all glyph vertices. + pm_color := premultiply_color(text.color) + + for data != nil { + vertex_start := u32(len(GLOB.tmp_text_verts)) + index_start := u32(len(GLOB.tmp_text_indices)) + + // Copy vertices with baked position offset + for i in 0 ..< data.num_vertices { + pos := data.xy[i] + uv := data.uv[i] + append( + &GLOB.tmp_text_verts, + Vertex_2D{position = {pos.x + base_x, -pos.y + base_y}, uv = {uv.x, uv.y}, color = pm_color}, + ) + } + + // Copy indices directly + append(&GLOB.tmp_text_indices, ..data.indices[:data.num_indices]) + + batch_idx := u32(len(GLOB.tmp_text_batches)) + append( + &GLOB.tmp_text_batches, + Text_Batch { + atlas_texture = data.atlas_texture, + vertex_start = vertex_start, + vertex_count = u32(data.num_vertices), + index_start = index_start, + index_count = u32(data.num_indices), + }, + ) + + // Each atlas chunk is a separate sub-batch (different atlas textures can't coalesce) + append_or_extend_sub_batch(scissor, layer, .Text, batch_idx, 1) + + data = data.next + } +} + +// Submit a text element with a 2D affine transform applied to vertices. +// Used by the high-level `text` proc when rotation or a non-zero origin is specified. +// NOTE: xform must be in physical (DPI-scaled) pixel space — the caller pre-scales +// pos and origin by GLOB.dpi_scaling before building the transform. +//INTERNAL +prepare_text_transformed :: proc(layer: ^Layer, text: Text, transform: Transform_2D) { + data := sdl_ttf.GetGPUTextDrawData(text.sdl_text) + if data == nil { + return + } + + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + + // Premultiply text color once — reused across all glyph vertices. + pm_color := premultiply_color(text.color) + + for data != nil { + vertex_start := u32(len(GLOB.tmp_text_verts)) + index_start := u32(len(GLOB.tmp_text_indices)) + + for i in 0 ..< data.num_vertices { + pos := data.xy[i] + uv := data.uv[i] + // SDL_ttf gives glyph positions in physical pixels relative to text origin. + // The transform is already in physical-pixel space (caller pre-scaled), + // so we apply directly — no per-vertex DPI divide/multiply. + append( + &GLOB.tmp_text_verts, + Vertex_2D{position = apply_transform(transform, {pos.x, -pos.y}), uv = {uv.x, uv.y}, color = pm_color}, + ) + } + + append(&GLOB.tmp_text_indices, ..data.indices[:data.num_indices]) + + batch_idx := u32(len(GLOB.tmp_text_batches)) + append( + &GLOB.tmp_text_batches, + Text_Batch { + atlas_texture = data.atlas_texture, + vertex_start = vertex_start, + vertex_count = u32(data.num_vertices), + index_start = index_start, + index_count = u32(data.num_indices), + }, + ) + + append_or_extend_sub_batch(scissor, layer, .Text, batch_idx, 1) + + data = data.next + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Primitive builders ------------ +// --------------------------------------------------------------------------------------------------------------------- + +//----- Internal helpers ---------------------------------- + +// Resolve Texture_Fill zero-initialized fields to their defaults. +// Odin structs zero-initialize; Color{} and Rectangle{} are all-zero which is not a +// useful tint or UV rect. This proc substitutes sensible defaults for zero values. +//INTERNAL +resolve_texture_defaults :: #force_inline proc( + tf: Texture_Fill, +) -> ( + tint: Color, + uv: Rectangle, + sampler: Sampler_Preset, +) { + tint = tf.tint == Color{} ? DFT_TINT : tf.tint + uv = tf.uv_rect == Rectangle{} ? DFT_UV_RECT : tf.uv_rect + sampler = tf.sampler + return +} + +// Compute the visual center of a center-parametrized shape after applying +// Convention B origin semantics: `center` is where the origin-point lands in +// world space; the visual center is offset by -origin and then rotated around +// the landing point. +// visual_center = center + R(θ) · (-origin) +// When θ=0: visual_center = center - origin (pure positioning shift). +// When origin={0,0}: visual_center = center (no change). +//INTERNAL +compute_pivot_center :: proc(center: Vec2, origin: Vec2, sin_angle, cos_angle: f32) -> Vec2 { + if origin == {0, 0} do return center + return( + center + + {cos_angle * (-origin.x) - sin_angle * (-origin.y), sin_angle * (-origin.x) + cos_angle * (-origin.y)} \ + ) +} + +// Compute the AABB half-extents of a rectangle with half-size (half_width, half_height) rotated by the given cos/sin. +//INTERNAL +rotated_aabb_half_extents :: proc(half_width, half_height, cos_angle, sin_angle: f32) -> [2]f32 { + cos_abs := abs(cos_angle) + sin_abs := abs(sin_angle) + return {half_width * cos_abs + half_height * sin_abs, half_width * sin_abs + half_height * cos_abs} +} + +// Pack sin/cos into the Core_2D_Primitive.rotation_sc field as two f16 values. +//INTERNAL +pack_rotation_sc :: #force_inline proc(sin_angle, cos_angle: f32) -> u32 { + return pack_f16_pair(f16(sin_angle), f16(cos_angle)) +} + +//----- Shape builders ---------------------------------- + +// Build an RRect Core_2D_Primitive with bounds, params, and rotation computed from rectangle geometry. +// The caller sets color, flags, and uv fields on the returned primitive before submitting. +//INTERNAL +build_rrect_primitive :: proc( + rect: Rectangle, + radii: Rectangle_Radii, + origin: Vec2, + rotation: f32, + feather_px: f32, +) -> Core_2D_Primitive { + max_radius := min(rect.width, rect.height) * 0.5 + clamped_top_left := clamp(radii.top_left, 0, max_radius) + clamped_top_right := clamp(radii.top_right, 0, max_radius) + clamped_bottom_right := clamp(radii.bottom_right, 0, max_radius) + clamped_bottom_left := clamp(radii.bottom_left, 0, max_radius) + + half_feather := feather_px * 0.5 + padding := half_feather / GLOB.dpi_scaling + dpi_scale := GLOB.dpi_scaling + + half_width := rect.width * 0.5 + half_height := rect.height * 0.5 + center_x := rect.x + half_width - origin.x + center_y := rect.y + half_height - origin.y + sin_angle: f32 = 0 + cos_angle: f32 = 1 + has_rotation := false + + if needs_transform(origin, rotation) { + rotation_radians := math.to_radians(rotation) + sin_angle, cos_angle = math.sincos(rotation_radians) + has_rotation = rotation != 0 + transform := build_pivot_rotation_sc({rect.x + origin.x, rect.y + origin.y}, origin, cos_angle, sin_angle) + new_center := apply_transform(transform, {half_width, half_height}) + center_x = new_center.x + center_y = new_center.y + } + + bounds_half_width, bounds_half_height := half_width, half_height + if has_rotation { + expanded := rotated_aabb_half_extents(half_width, half_height, cos_angle, sin_angle) + bounds_half_width = expanded.x + bounds_half_height = expanded.y + } + + prim := Core_2D_Primitive { + bounds = { + center_x - bounds_half_width - padding, + center_y - bounds_half_height - padding, + center_x + bounds_half_width + padding, + center_y + bounds_half_height + padding, + }, + rotation_sc = has_rotation ? pack_rotation_sc(sin_angle, cos_angle) : 0, + } + prim.params.rrect = RRect_Params { + half_size = {half_width * dpi_scale, half_height * dpi_scale}, + radii = { + clamped_bottom_right * dpi_scale, + clamped_top_right * dpi_scale, + clamped_bottom_left * dpi_scale, + clamped_top_left * dpi_scale, + }, + half_feather = half_feather, + } + return prim +} + +// Build an RRect Core_2D_Primitive for a circle (fully-rounded square RRect). +// The caller sets color, flags, and uv fields on the returned primitive before submitting. +//INTERNAL +build_circle_primitive :: proc( + center: Vec2, + radius: f32, + origin: Vec2, + rotation: f32, + feather_px: f32, +) -> Core_2D_Primitive { + half_feather := feather_px * 0.5 + padding := half_feather / GLOB.dpi_scaling + dpi_scale := GLOB.dpi_scaling + + actual_center := center + if origin != {0, 0} { + sin_a, cos_a := math.sincos(math.to_radians(rotation)) + actual_center = compute_pivot_center(center, origin, sin_a, cos_a) + } + + prim := Core_2D_Primitive { + bounds = { + actual_center.x - radius - padding, + actual_center.y - radius - padding, + actual_center.x + radius + padding, + actual_center.y + radius + padding, + }, + } + scaled_radius := radius * dpi_scale + prim.params.rrect = RRect_Params { + half_size = {scaled_radius, scaled_radius}, + radii = {scaled_radius, scaled_radius, scaled_radius, scaled_radius}, + half_feather = half_feather, + } + return prim +} + +// Build an Ellipse Core_2D_Primitive with bounds, params, and rotation computed from ellipse geometry. +// The caller sets color, flags, and uv fields on the returned primitive before submitting. +//INTERNAL +build_ellipse_primitive :: proc( + center: Vec2, + radius_horizontal, radius_vertical: f32, + origin: Vec2, + rotation: f32, + feather_px: f32, +) -> Core_2D_Primitive { + half_feather := feather_px * 0.5 + padding := half_feather / GLOB.dpi_scaling + dpi_scale := GLOB.dpi_scaling + + actual_center := center + sin_angle: f32 = 0 + cos_angle: f32 = 1 + has_rotation := false + + if needs_transform(origin, rotation) { + rotation_radians := math.to_radians(rotation) + sin_angle, cos_angle = math.sincos(rotation_radians) + actual_center = compute_pivot_center(center, origin, sin_angle, cos_angle) + has_rotation = rotation != 0 + } + + bound_horizontal, bound_vertical := radius_horizontal, radius_vertical + if has_rotation { + expanded := rotated_aabb_half_extents(radius_horizontal, radius_vertical, cos_angle, sin_angle) + bound_horizontal = expanded.x + bound_vertical = expanded.y + } + + prim := Core_2D_Primitive { + bounds = { + actual_center.x - bound_horizontal - padding, + actual_center.y - bound_vertical - padding, + actual_center.x + bound_horizontal + padding, + actual_center.y + bound_vertical + padding, + }, + rotation_sc = has_rotation ? pack_rotation_sc(sin_angle, cos_angle) : 0, + } + prim.params.ellipse = Ellipse_Params { + radii = {radius_horizontal * dpi_scale, radius_vertical * dpi_scale}, + half_feather = half_feather, + } + return prim +} + +// Build an NGon Core_2D_Primitive with bounds, params, and rotation computed from polygon geometry. +// The caller sets color, flags, and uv fields on the returned primitive before submitting. +//INTERNAL +build_polygon_primitive :: proc( + center: Vec2, + sides: int, + radius: f32, + origin: Vec2, + rotation: f32, + feather_px: f32, +) -> Core_2D_Primitive { + half_feather := feather_px * 0.5 + padding := half_feather / GLOB.dpi_scaling + dpi_scale := GLOB.dpi_scaling + + actual_center := center + if origin != {0, 0} && rotation != 0 { + sin_a, cos_a := math.sincos(math.to_radians(rotation)) + actual_center = compute_pivot_center(center, origin, sin_a, cos_a) + } + + rotation_radians := math.to_radians(rotation) + sin_rot, cos_rot := math.sincos(rotation_radians) + + prim := Core_2D_Primitive { + bounds = { + actual_center.x - radius - padding, + actual_center.y - radius - padding, + actual_center.x + radius + padding, + actual_center.y + radius + padding, + }, + rotation_sc = rotation != 0 ? pack_rotation_sc(sin_rot, cos_rot) : 0, + } + prim.params.ngon = NGon_Params { + radius = radius * math.cos(math.PI / f32(sides)) * dpi_scale, + sides = f32(sides), + half_feather = half_feather, + } + return prim +} + +// Build a Ring_Arc Core_2D_Primitive with bounds and params computed from ring/arc geometry. +// Pre-computes the angular boundary normals on the CPU so the fragment shader needs +// no per-pixel sin/cos. The radial SDF uses max(inner-r, r-outer) which correctly +// handles pie slices (inner_radius = 0) and full rings. +// The caller sets color, flags, and uv fields on the returned primitive before submitting. +//INTERNAL +build_ring_arc_primitive :: proc( + center: Vec2, + inner_radius, outer_radius: f32, + start_angle: f32, + end_angle: f32, + origin: Vec2, + rotation: f32, + feather_px: f32, +) -> ( + Core_2D_Primitive, + Shape_Flags, +) { + half_feather := feather_px * 0.5 + padding := half_feather / GLOB.dpi_scaling + dpi_scale := GLOB.dpi_scaling + + actual_center := center + rotation_offset: f32 = 0 + if needs_transform(origin, rotation) { + sin_a, cos_a := math.sincos(math.to_radians(rotation)) + actual_center = compute_pivot_center(center, origin, sin_a, cos_a) + rotation_offset = math.to_radians(rotation) + } + + start_rad := math.to_radians(start_angle) + rotation_offset + end_rad := math.to_radians(end_angle) + rotation_offset + + // Normalize arc span to [0, 2π] + arc_span := end_rad - start_rad + if arc_span < 0 { + arc_span += 2 * math.PI + } + + // Pre-compute edge normals and arc flags on CPU — no per-pixel trig needed. + // arc_flags: {} = full ring, {.Arc_Narrow} = span ≤ π (intersect), {.Arc_Wide} = span > π (union) + arc_flags: Shape_Flags = {} + normal_start: [2]f32 = {} + normal_end: [2]f32 = {} + + if arc_span < 2 * math.PI - 0.001 { + sin_start, cos_start := math.sincos(start_rad) + sin_end, cos_end := math.sincos(end_rad) + normal_start = {sin_start, -cos_start} + normal_end = {-sin_end, cos_end} + arc_flags = arc_span <= math.PI ? {.Arc_Narrow} : {.Arc_Wide} + } + + prim := Core_2D_Primitive { + bounds = { + actual_center.x - outer_radius - padding, + actual_center.y - outer_radius - padding, + actual_center.x + outer_radius + padding, + actual_center.y + outer_radius + padding, + }, + } + prim.params.ring_arc = Ring_Arc_Params { + inner_radius = inner_radius * dpi_scale, + outer_radius = outer_radius * dpi_scale, + normal_start = normal_start, + normal_end = normal_end, + half_feather = half_feather, + } + return prim, arc_flags +} + +//----- Brush and outline ---------------------------------- + +// Apply brush fill and outline to a primitive, then submit it. +// Dispatches to the correct sub-batch based on the Brush variant. +// All parameters (outline_width) are in logical pixels, matching the rest of the public API. +// The helper converts to physical pixels for GPU packing internally. +//INTERNAL +apply_brush_and_outline :: proc( + layer: ^Layer, + prim: ^Core_2D_Primitive, + kind: Shape_Kind, + brush: Brush, + outline_color: Color, + outline_width: f32, + extra_flags: Shape_Flags = {}, +) { + flags: Shape_Flags = extra_flags + + // Fill — determined by the Brush variant. + texture_id := INVALID_TEXTURE + sampler := DFT_SAMPLER + + switch b in brush { + case Color: prim.color = b + case Linear_Gradient: + flags += {.Gradient} + prim.color = b.start_color + prim.effects.gradient_color = b.end_color + rad := math.to_radians(b.angle) + sin_a, cos_a := math.sincos(rad) + prim.effects.gradient_dir_sc = pack_f16_pair(f16(cos_a), f16(sin_a)) + case Radial_Gradient: + flags += {.Gradient, .Gradient_Radial} + prim.color = b.inner_color + prim.effects.gradient_color = b.outer_color + case Texture_Fill: + flags += {.Textured} + tint, uv, sam := resolve_texture_defaults(b) + prim.color = tint + prim.uv_rect = {uv.x, uv.y, uv.width, uv.height} + texture_id = b.id + sampler = sam + } + + // Outline — orthogonal to all Brush variants. + if outline_width > 0 { + flags += {.Outline} + prim.effects.outline_color = outline_color + prim.effects.outline_packed = pack_f16_pair(f16(outline_width * GLOB.dpi_scaling), 0) + // Expand bounds to contain the outline (bounds are in logical pixels) + prim.bounds[0] -= outline_width + prim.bounds[1] -= outline_width + prim.bounds[2] += outline_width + prim.bounds[3] += outline_width + } + + // Set .Rotated flag if rotation_sc was populated by the build proc + if prim.rotation_sc != 0 { + flags += {.Rotated} + } + + prim.flags = pack_kind_flags(kind, flags) + prepare_sdf_primitive_ex(layer, prim^, texture_id, sampler) +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Public draw procs ------------ +// --------------------------------------------------------------------------------------------------------------------- + +// Draw a filled rectangle via SDF with optional per-corner rounding radii. +// Use `uniform_radii(rect, roundness)` to compute uniform radii from a 0–1 fraction. +// +// Origin semantics: +// `origin` is a local offset from the rect's top-left corner that selects both the positioning +// anchor and the rotation pivot. `rect.x, rect.y` specifies where that anchor point lands in +// world space. When `origin = {0, 0}` (default), `rect.x, rect.y` is the top-left corner. +// Rotation always occurs around the anchor point. +rectangle :: proc( + layer: ^Layer, + rect: Rectangle, + brush: Brush, + outline_color: Color = {}, + outline_width: f32 = 0, + radii: Rectangle_Radii = {}, + origin: Vec2 = {}, + rotation: f32 = 0, + feather_px: f32 = DFT_FEATHER_PX, +) { + prim := build_rrect_primitive(rect, radii, origin, rotation, feather_px) + apply_brush_and_outline(layer, &prim, .RRect, brush, outline_color, outline_width) +} + +// Draw a filled circle via SDF (emitted as a fully-rounded RRect). +// +// Origin semantics (Convention B): +// `origin` is a local offset from the shape's center that selects both the positioning anchor +// and the rotation pivot. The `center` parameter specifies where that anchor point lands in +// world space. When `origin = {0, 0}` (default), `center` is the visual center. +// When `origin = {r, 0}`, the point `r` pixels to the right of the shape center lands at +// `center`, shifting the shape left by `r`. +circle :: proc( + layer: ^Layer, + center: Vec2, + radius: f32, + brush: Brush, + outline_color: Color = {}, + outline_width: f32 = 0, + origin: Vec2 = {}, + rotation: f32 = 0, + feather_px: f32 = DFT_FEATHER_PX, +) { + prim := build_circle_primitive(center, radius, origin, rotation, feather_px) + apply_brush_and_outline(layer, &prim, .RRect, brush, outline_color, outline_width) +} + +// Draw a filled ellipse via SDF. +// Origin semantics: see `circle`. +ellipse :: proc( + layer: ^Layer, + center: Vec2, + radius_horizontal, radius_vertical: f32, + brush: Brush, + outline_color: Color = {}, + outline_width: f32 = 0, + origin: Vec2 = {}, + rotation: f32 = 0, + feather_px: f32 = DFT_FEATHER_PX, +) { + prim := build_ellipse_primitive(center, radius_horizontal, radius_vertical, origin, rotation, feather_px) + apply_brush_and_outline(layer, &prim, .Ellipse, brush, outline_color, outline_width) +} + +// Draw a filled regular polygon via SDF. +// `sides` must be >= 3. The polygon is inscribed in a circle of the given `radius`. +// Origin semantics: see `circle`. +polygon :: proc( + layer: ^Layer, + center: Vec2, + sides: int, + radius: f32, + brush: Brush, + outline_color: Color = {}, + outline_width: f32 = 0, + origin: Vec2 = {}, + rotation: f32 = 0, + feather_px: f32 = DFT_FEATHER_PX, +) { + if sides < 3 do return + + prim := build_polygon_primitive(center, sides, radius, origin, rotation, feather_px) + apply_brush_and_outline(layer, &prim, .NGon, brush, outline_color, outline_width) +} + +// Draw a ring, arc, or pie slice via SDF. +// Full ring by default. Pass start_angle/end_angle (degrees) for partial arcs. +// Use inner_radius = 0 for pie slices (sectors). +// Origin semantics: see `circle`. +ring :: proc( + layer: ^Layer, + center: Vec2, + inner_radius, outer_radius: f32, + brush: Brush, + outline_color: Color = {}, + outline_width: f32 = 0, + start_angle: f32 = 0, + end_angle: f32 = DFT_CIRC_END_ANGLE, + origin: Vec2 = {}, + rotation: f32 = 0, + feather_px: f32 = DFT_FEATHER_PX, +) { + prim, arc_flags := build_ring_arc_primitive( + center, + inner_radius, + outer_radius, + start_angle, + end_angle, + origin, + rotation, + feather_px, + ) + apply_brush_and_outline(layer, &prim, .Ring_Arc, brush, outline_color, outline_width, arc_flags) +} + +// Draw a line segment via SDF (emitted as a rotated capsule-shaped RRect). +// Round caps are produced by setting corner radii equal to half the thickness. +line :: proc( + layer: ^Layer, + start_position, end_position: Vec2, + brush: Brush, + thickness: f32 = DFT_STROKE_THICKNESS, + outline_color: Color = {}, + outline_width: f32 = 0, + feather_px: f32 = DFT_FEATHER_PX, +) { + delta_x := end_position.x - start_position.x + delta_y := end_position.y - start_position.y + seg_length := math.sqrt(delta_x * delta_x + delta_y * delta_y) + if seg_length < 0.0001 do return + rotation_radians := math.atan2(delta_y, delta_x) + sin_angle, cos_angle := math.sincos(rotation_radians) + + center_x := (start_position.x + end_position.x) * 0.5 + center_y := (start_position.y + end_position.y) * 0.5 + + half_length := seg_length * 0.5 + half_thickness := thickness * 0.5 + cap_radius := half_thickness + + half_feather := feather_px * 0.5 + padding := half_feather / GLOB.dpi_scaling + dpi_scale := GLOB.dpi_scaling + + // Expand bounds for rotation + bounds_half := rotated_aabb_half_extents(half_length + cap_radius, half_thickness, cos_angle, sin_angle) + + prim := Core_2D_Primitive { + bounds = { + center_x - bounds_half.x - padding, + center_y - bounds_half.y - padding, + center_x + bounds_half.x + padding, + center_y + bounds_half.y + padding, + }, + rotation_sc = pack_rotation_sc(sin_angle, cos_angle), + } + prim.params.rrect = RRect_Params { + half_size = {(half_length + cap_radius) * dpi_scale, half_thickness * dpi_scale}, + radii = { + cap_radius * dpi_scale, + cap_radius * dpi_scale, + cap_radius * dpi_scale, + cap_radius * dpi_scale, + }, + half_feather = half_feather, + } + apply_brush_and_outline(layer, &prim, .RRect, brush, outline_color, outline_width) +} + +// Draw a line strip via decomposed SDF line segments. +line_strip :: proc( + layer: ^Layer, + points: []Vec2, + brush: Brush, + thickness: f32 = DFT_STROKE_THICKNESS, + outline_color: Color = {}, + outline_width: f32 = 0, + feather_px: f32 = DFT_FEATHER_PX, +) { + if len(points) < 2 do return + for i in 0 ..< len(points) - 1 { + line(layer, points[i], points[i + 1], brush, thickness, outline_color, outline_width, feather_px) + } +} diff --git a/draw/cybersteel/cybersteel.odin b/draw/cybersteel/cybersteel.odin new file mode 100644 index 0000000..7f23310 --- /dev/null +++ b/draw/cybersteel/cybersteel.odin @@ -0,0 +1,756 @@ +// CYBERSTEEL DESIGN SYSTEM — Odin theme constants +// +// Retrofuturist. Technical. Direct. Gruvbox-derived palette +// with Art Deco type system. Every visual token from the +// Cybersteel design system, transferred 1:1 to Odin constants. +// +// Conventions: +// - Colors are [4]u8 RGBA. Alpha 255 = fully opaque. +// Translucent tints carry their alpha in the 4th channel. +// - Times are time.Duration via core:time. +// - Pixel sizes, weights, line-heights, letter-spacings, and +// ratio-like values are plain (untyped) numeric literals so +// callers can use them with whatever numeric type they need. +// - Letter-spacing values are expressed in EMs (multiply by +// the resolved font size to get pixels). +// - Line-heights are unitless multipliers of the font size. + +package cybersteel + +import "core:time" + +import draw ".." + + +// ============================================================ +// BASE BACKGROUNDS — warm dark, Gruvbox-derived +// Never pure black. The warmth is intentional: aged metal, +// amber phosphor, old paper. Order is: deepest chrome first +// (shell), then page, then progressively lighter surfaces. +// ============================================================ + +// Topbar, sidebar, nav chrome, modal backdrops. Deepest base. +BG_SHELL :: draw.Color{0x1d, 0x20, 0x21, 0xff} + +// Default page canvas / main content area. One step up from shell. +BG_PAGE :: draw.Color{0x31, 0x31, 0x31, 0xff} + +// Cards, panels, drawers, input fields, code blocks, table rows. +// Slightly lighter than the page so raised surfaces read clearly +// without shadows. +BG_SURFACE :: draw.Color{0x3c, 0x38, 0x36, 0xff} + +// Selected rows, active nav items, hover states. One step lighter +// than BG_SURFACE. +BG_ACTIVE :: draw.Color{0x50, 0x49, 0x45, 0xff} + +// Disabled buttons / inputs background. Pairs with FG_MUTED text +// only — the contrast is intentionally low. +BG_DISABLED :: draw.Color{0x66, 0x5c, 0x54, 0xff} + +// Borders, dividers, rules, input outlines. Never use as a text +// surface — it has no fg-pair guarantee. +BG_BORDER :: draw.Color{0x7c, 0x6f, 0x64, 0xff} + + +// ============================================================ +// BASE FOREGROUNDS — warm cream / ivory, never pure white +// Five-step ramp from brightest (heading) to most muted. +// ============================================================ + +// Hero text, page headings, display titles. Brightest fg. +FG_HEADING :: draw.Color{0xfb, 0xf1, 0xc7, 0xff} + +// Primary body text, default readable content. +FG_BODY :: draw.Color{0xf2, 0xe2, 0xba, 0xff} + +// Labels, secondary descriptions, table data. +FG_SECONDARY :: draw.Color{0xe0, 0xd0, 0xa8, 0xff} + +// Captions, metadata, timestamps, placeholders. +FG_CAPTION :: draw.Color{0xce, 0xbd, 0x9e, 0xff} + +// Disabled text, token labels, subtle UI annotations. +FG_MUTED :: draw.Color{0xb8, 0xa9, 0x8e, 0xff} + + +// ============================================================ +// ACCENT — GOLD (signature color, Art Deco) +// The defining accent of the system. Use sparingly: borders, +// highlights, focus rings, primary interactive states. +// ============================================================ + +// Primary interactive, focus rings, headline interactive accent. +GOLD_BRIGHT :: draw.Color{0xfa, 0xbd, 0x2f, 0xff} + +// Borders, decorative rules, default Art Deco ornament color. +GOLD_DIM :: draw.Color{0xd7, 0x99, 0x21, 0xff} + +// Hover states, pressed accents, dimmer gold contexts. +GOLD_MUTED :: draw.Color{0xb5, 0x76, 0x14, 0xff} + +// Pure CRT amber. Reserved for terminal-style glow / phosphor +// references — distinct from gold ramp. +AMBER :: draw.Color{0xff, 0xb0, 0x00, 0xff} + + +// ============================================================ +// ACCENT — RED (danger, errors, critical alerts) +// ============================================================ + +RED_BRIGHT :: draw.Color{0xfb, 0x49, 0x34, 0xff} +RED_DIM :: draw.Color{0xcc, 0x24, 0x1d, 0xff} +RED_MUTED :: draw.Color{0x9d, 0x00, 0x06, 0xff} + + +// ============================================================ +// ACCENT — GREEN (success, safe, complete) +// ============================================================ + +GREEN_BRIGHT :: draw.Color{0xb8, 0xbb, 0x26, 0xff} +GREEN_DIM :: draw.Color{0x98, 0x97, 0x1a, 0xff} +GREEN_MUTED :: draw.Color{0x79, 0x74, 0x0e, 0xff} + + +// ============================================================ +// ACCENT — BLUE / TEAL (info, links, cool technical elements) +// ============================================================ + +BLUE_BRIGHT :: draw.Color{0x83, 0xa5, 0x98, 0xff} +BLUE_DIM :: draw.Color{0x45, 0x85, 0x88, 0xff} +BLUE_MUTED :: draw.Color{0x07, 0x66, 0x78, 0xff} + + +// ============================================================ +// ACCENT — ORANGE (warnings, in-progress, hot paths) +// ============================================================ + +ORANGE_BRIGHT :: draw.Color{0xfe, 0x80, 0x19, 0xff} +ORANGE_DIM :: draw.Color{0xd6, 0x5d, 0x0e, 0xff} +ORANGE_MUTED :: draw.Color{0xaf, 0x3a, 0x03, 0xff} + + +// ============================================================ +// ACCENT — AQUA (cool secondary accent, fresh/active states) +// ============================================================ + +AQUA_BRIGHT :: draw.Color{0x8e, 0xc0, 0x7c, 0xff} +AQUA_DIM :: draw.Color{0x68, 0x9d, 0x6a, 0xff} +AQUA_MUTED :: draw.Color{0x42, 0x7b, 0x58, 0xff} + + +// ============================================================ +// ACCENT — PURPLE (rare, for categorical / data-vis variety) +// ============================================================ + +PURPLE_BRIGHT :: draw.Color{0xd3, 0x86, 0x9b, 0xff} +PURPLE_DIM :: draw.Color{0xb1, 0x62, 0x86, 0xff} +PURPLE_MUTED :: draw.Color{0x8f, 0x3f, 0x71, 0xff} + + +// ============================================================ +// SEMANTIC COLOR ROLES +// Aliases to accent ramps, named by intent. Prefer these in +// product code so meaning travels with the value. +// ============================================================ + +// Primary brand interactive — buttons, key links, focus ring. +COLOR_PRIMARY :: GOLD_BRIGHT +COLOR_PRIMARY_DIM :: GOLD_DIM + +// Destructive / error / critical states. +COLOR_DANGER :: RED_BRIGHT +COLOR_DANGER_DIM :: RED_DIM + +// Successful operation / safe state / completion. +COLOR_SUCCESS :: GREEN_BRIGHT +COLOR_SUCCESS_DIM :: GREEN_DIM + +// Caution / in-progress / non-fatal anomaly. +COLOR_WARNING :: ORANGE_BRIGHT +COLOR_WARNING_DIM :: ORANGE_DIM + +// Informational / neutral status / passive notice. +COLOR_INFO :: BLUE_BRIGHT +COLOR_INFO_DIM :: BLUE_DIM + +// Hyperlinks at rest and on hover (links flip to gold on hover). +COLOR_LINK :: BLUE_BRIGHT +COLOR_LINK_HOVER :: GOLD_BRIGHT + +// Keyboard / programmatic focus ring color. +COLOR_FOCUS :: GOLD_BRIGHT + + +// ============================================================ +// SURFACE ROLES +// Semantic aliases for the bg ramp by usage role. +// ============================================================ + +SURFACE_PAGE :: BG_PAGE // root canvas +SURFACE_RAISED :: BG_SURFACE // cards, panels, inputs +SURFACE_OVERLAY :: BG_SHELL // modals, popovers, deep chrome +SURFACE_HOVER :: BG_ACTIVE // hovered raised surfaces +SURFACE_ACTIVE :: BG_SURFACE // pressed/active raised surfaces + + +// ============================================================ +// BORDER ROLES +// Cybersteel borders are 1px solid, always crisp, always visible. +// Color carries the meaning; weight rarely changes. +// ============================================================ + +BORDER :: BG_BORDER // structural borders, default +BORDER_SUBTLE :: BG_DISABLED // very faint separators +BORDER_ACCENT :: GOLD_DIM // decorative / active edge +BORDER_FOCUS :: GOLD_BRIGHT // focus rings +BORDER_DANGER :: RED_DIM // destructive states +BORDER_SUCCESS :: GREEN_DIM // success states + + +// ============================================================ +// TRANSLUCENT ACCENT TINTS +// Used for hover fills behind ghost buttons and for warm +// gradient overlays. Alpha encodes the tint strength. +// ============================================================ + +// 20% gold tint behind a hovered secondary button. +TINT_GOLD_HOVER :: draw.Color{0xd7, 0x99, 0x21, 0x33} // ~20% alpha + +// 20% red tint behind a hovered danger ghost button. +TINT_DANGER_HOVER :: draw.Color{0xcc, 0x24, 0x1d, 0x33} + +// 20% green tint behind a hovered success ghost button. +TINT_SUCCESS_HOVER :: draw.Color{0x98, 0x97, 0x1a, 0x33} + +// 8% gold tint — top of the diagonal "gold fade" feature +// section overlay. +TINT_GOLD_FADE :: draw.Color{0xfa, 0xbd, 0x2f, 0x14} // ~8% alpha + +// 6% amber tint — top of the vertical "amber fade" overlay. +TINT_AMBER_FADE :: draw.Color{0xff, 0xb0, 0x00, 0x0f} // ~6% alpha + +// 4% gold tint — corner of card gradient. +TINT_GOLD_CARD :: draw.Color{0xfa, 0xbd, 0x2f, 0x0a} // ~4% alpha + +// 3% black tint — scanline overlay stripe color. +TINT_SCANLINE :: draw.Color{0x00, 0x00, 0x00, 0x08} // ~3% alpha + + +// ============================================================ +// SHADOWS +// Cybersteel is FLAT — no drop shadows. Elevation is expressed +// through bg + border only. The single permitted shadow use is +// a 1px gold ring as a focus / active indicator. Constants are +// kept here so callers don't reach for ad-hoc shadow values. +// ============================================================ + +// 1px inset gold ring — only permitted shadow, used as focus +// or selected-state outline. Width is 1px; color follows. +SHADOW_GOLD_RING_WIDTH :: 1 +SHADOW_GOLD_RING_COLOR :: GOLD_DIM + + +// ============================================================ +// SPACING SCALE (8px base grid) +// All spacing values are multiples of 4px, with the main scale +// in multiples of 8px. Names describe the scope of the gap, not +// the raw size — pick by intent, not by pixel count. +// ============================================================ + +// Badge/tag inner padding, icon-label gap, border offsets, micro nudges. +SPACE_CHIP :: 4 + +// Inline element gaps, chip/pill padding, icon inset, tight row spacing. +SPACE_ELEMENT :: 8 + +// Button vertical padding, input inset, list row gap, label-to-field gap. +SPACE_COMPONENT :: 12 + +// Card inset, input horizontal padding, form field gap, default gap. +SPACE_GROUP :: 16 + +// Grouped nav items, related form section spacing, compact panel inset. +SPACE_CLUSTER :: 20 + +// Sidebar / panel inset, modal body padding, drawer inset, section +// subheader gap. +SPACE_PANEL :: 24 + +// Between distinct content blocks, card grid gutter, toolbar height. +SPACE_BLOCK :: 32 + +// Major content group spacing, dialog padding, page sub-section gap. +SPACE_CONTENT :: 40 + +// Page section breaks, feature group dividers, hero subheading gap. +SPACE_SECTION :: 48 + +// Hero vertical padding, layout area spacing, large feature gaps. +SPACE_REGION :: 64 + +// Page-scale layout spacing, full-width section vertical rhythm. +SPACE_ZONE :: 80 + +// Page margins, full-bleed hero top padding, maximum layout gutter. +SPACE_CANVAS :: 96 + + +// ============================================================ +// CORNER RADIUS +// Cybersteel does not round its corners like a toy. 0–4px is the +// preferred range; larger radii exist only for chips/pills. +// ============================================================ + +RADIUS_NONE :: 0 // sharp corners — preferred default for chrome +RADIUS_SM :: 4 // micro-rounding for inline code, small badges +RADIUS_MD :: 6 // default for cards, buttons, inputs +RADIUS_LG :: 10 // rare — used only for prominent containers +RADIUS_PILL :: 999 // fully-rounded chips, status pills, tags + + +// ============================================================ +// BORDER WIDTH +// 1px solid is the standard. Heavier weights are only used for +// the Art Deco hairline accent on pre/code blocks. +// ============================================================ + +// Standard border weight everywhere — always crisp, always visible. +BORDER_WIDTH_DEFAULT :: 1 + +// Accent edge on
 blocks (left side, gold) and similar
+// emphasized rule treatments.
+BORDER_WIDTH_ACCENT :: 2
+
+
+// ============================================================
+// MOTION — TRANSITION DURATIONS
+// Fast and purposeful. No bounce, no spring, no elastic. UI
+// state changes in well under a quarter-second. Animations
+// must explain causality; nothing is decorative.
+// ============================================================
+
+// Entering active/pressed state. Snap-down feel — must feel
+// instant under the finger.
+TRANSITION_PRESS :: 55 * time.Millisecond
+
+// Releasing from a pressed state, and slower hover-out cases.
+TRANSITION_UI :: 180 * time.Millisecond
+
+// Hover enter / exit color shift on buttons, cards, links.
+TRANSITION_HOVER :: 150 * time.Millisecond
+
+// Overlay / modal / popover fade-in. Slightly longer to
+// signal "a layer changed", not "a control changed".
+TRANSITION_MODAL :: 200 * time.Millisecond
+
+// Cursor / immediate-feedback transitions (caret moves,
+// terminal output ticks).
+TRANSITION_CURSOR :: 80 * time.Millisecond
+
+
+// ============================================================
+// MOTION — COMPONENT-LEVEL TIMINGS
+// Specific named durations for known interactions. Prefer these
+// over picking a raw transition for a given component.
+// ============================================================
+
+// Button press fade — primary/secondary/danger/success share this.
+BUTTON_PRESS_FADE_DUR :: 55 * time.Millisecond
+
+// Button release / hover-out fade.
+BUTTON_RELEASE_FADE_DUR :: 180 * time.Millisecond
+
+// Card hover (border + bg crossfade).
+CARD_HOVER_FADE_DUR :: 150 * time.Millisecond
+
+// Card press (border + bg snap to active).
+CARD_PRESS_FADE_DUR :: 55 * time.Millisecond
+
+// Modal / overlay enter.
+MODAL_ENTER_DUR :: 200 * time.Millisecond
+
+// Modal / overlay exit (mirror of enter for symmetry).
+MODAL_EXIT_DUR :: 200 * time.Millisecond
+
+// Link color crossfade on hover.
+LINK_HOVER_FADE_DUR :: 180 * time.Millisecond
+
+// Terminal scanline flicker tick — single frame of the loop.
+SCANLINE_FLICKER_TICK :: 80 * time.Millisecond
+
+
+// ============================================================
+// TYPOGRAPHY — FONT FAMILY NAMES
+// Sans: IBM Plex Sans
+// Mono: Lilex — IBM Plex Mono with programming ligatures.
+//       Drop-in Plex Mono replacement; same skeleton, same
+//       proportions, plus =>, !=, >=, <=, etc. ligatures.
+// Plex Sans covers display, body, and condensed roles by
+// default. Lilex is for code, terminal output, data values,
+// and full mono-mode surfaces.
+// ============================================================
+
+// Plain family names
+FONT_FAMILY_SANS :: "IBM Plex Sans"
+FONT_FAMILY_MONO :: "Lilex"
+
+// IBM Plex Sans raw font data
+SANS_THIN_RAW :: #load("fonts/IBMPlexSans-Thin.ttf") // IBM Plex Sans
+SANS_THIN_ITALIC_RAW :: #load("fonts/IBMPlexSans-ThinItalic.ttf") // IBM Plex Sans
+SANS_EXTRALIGHT_RAW :: #load("fonts/IBMPlexSans-ExtraLight.ttf") // IBM Plex Sans
+SANS_EXTRALIGHT_ITALIC_RAW :: #load("fonts/IBMPlexSans-ExtraLightItalic.ttf") // IBM Plex Sans
+SANS_LIGHT_RAW :: #load("fonts/IBMPlexSans-Light.ttf") // IBM Plex Sans
+SANS_LIGHT_ITALIC_RAW :: #load("fonts/IBMPlexSans-LightItalic.ttf") // IBM Plex Sans
+SANS_REGULAR_RAW :: #load("fonts/IBMPlexSans-Regular.ttf") // IBM Plex Sans
+SANS_ITALIC_RAW :: #load("fonts/IBMPlexSans-Italic.ttf") // IBM Plex Sans
+SANS_MEDIUM_RAW :: #load("fonts/IBMPlexSans-Medium.ttf") // IBM Plex Sans
+SANS_MEDIUM_ITALIC_RAW :: #load("fonts/IBMPlexSans-MediumItalic.ttf") // IBM Plex Sans
+SANS_SEMIBOLD_RAW :: #load("fonts/IBMPlexSans-SemiBold.ttf") // IBM Plex Sans
+SANS_SEMIBOLD_ITALIC_RAW :: #load("fonts/IBMPlexSans-SemiBoldItalic.ttf") // IBM Plex Sans
+SANS_BOLD_RAW :: #load("fonts/IBMPlexSans-Bold.ttf") // IBM Plex Sans
+SANS_BOLD_ITALIC_RAW :: #load("fonts/IBMPlexSans-BoldItalic.ttf") // IBM Plex Sans
+
+// Lilex raw font data
+MONO_THIN_RAW :: #load("fonts/Lilex-Thin.ttf") // Lilex
+MONO_THIN_ITALIC_RAW :: #load("fonts/Lilex-ThinItalic.ttf") // Lilex
+MONO_EXTRALIGHT_RAW :: #load("fonts/Lilex-ExtraLight.ttf") // Lilex
+MONO_EXTRALIGHT_ITALIC_RAW :: #load("fonts/Lilex-ExtraLightItalic.ttf") // Lilex
+MONO_LIGHT_RAW :: #load("fonts/Lilex-Light.ttf") // Lilex
+MONO_LIGHT_ITALIC_RAW :: #load("fonts/Lilex-LightItalic.ttf") // Lilex
+MONO_REGULAR_RAW :: #load("fonts/Lilex-Regular.ttf") // Lilex
+MONO_ITALIC_RAW :: #load("fonts/Lilex-Italic.ttf") // Lilex
+MONO_MEDIUM_RAW :: #load("fonts/Lilex-Medium.ttf") // Lilex
+MONO_MEDIUM_ITALIC_RAW :: #load("fonts/Lilex-MediumItalic.ttf") // Lilex
+MONO_SEMIBOLD_RAW :: #load("fonts/Lilex-SemiBold.ttf") // Lilex
+MONO_SEMIBOLD_ITALIC_RAW :: #load("fonts/Lilex-SemiBoldItalic.ttf") // Lilex
+MONO_BOLD_RAW :: #load("fonts/Lilex-Bold.ttf") // Lilex
+MONO_BOLD_ITALIC_RAW :: #load("fonts/Lilex-BoldItalic.ttf") // Lilex
+
+
+// ============================================================
+// TYPOGRAPHY — TYPE SCALE (1.25 modular ratio, base 16px)
+// Minimum body size on web is 14px; print is 12pt.
+// ============================================================
+
+TEXT_XS :: 11 // status badges, fine print
+TEXT_SM :: 13 // secondary labels, captions
+TEXT_BASE :: 15 // default body text
+TEXT_MD :: 16 // slightly prominent body
+TEXT_LG :: 18 // subheadings, emphasized labels
+TEXT_XL :: 22 // H3 level
+TEXT_2XL :: 28 // H2 level
+TEXT_3XL :: 36 // H1 level
+TEXT_4XL :: 48 // display / hero
+TEXT_5XL :: 64 // hero display
+TEXT_6XL :: 96 // max scale; masthead only
+
+
+// ============================================================
+// TYPOGRAPHY — FONT WEIGHTS
+// Constrained to the STATIC weights that BOTH faces actually
+// ship from Google Fonts — IBM Plex Sans and Lilex share the
+// same seven static instances:
+//   100 Thin · 200 ExtraLight · 300 Light · 400 Regular ·
+//   500 Medium · 600 SemiBold · 700 Bold
+// There is no 800 ExtraBold and no 900 Black for either face.
+// Do not request a weight outside this set — Google's API
+// will fail or substitute, and the design will drift.
+// ============================================================
+
+WEIGHT_THIN :: 100
+WEIGHT_EXTRALIGHT :: 200
+WEIGHT_LIGHT :: 300
+WEIGHT_REGULAR :: 400
+WEIGHT_MEDIUM :: 500
+WEIGHT_SEMIBOLD :: 600
+WEIGHT_BOLD :: 700
+
+
+// ============================================================
+// TYPOGRAPHY — LINE HEIGHTS (unitless multipliers)
+// Multiply by font size to derive a leading in pixels.
+// ============================================================
+
+LEADING_TIGHT :: 1.15 // display headings
+LEADING_SNUG :: 1.30 // subheadings
+LEADING_NORMAL :: 1.50 // default body prose
+LEADING_LOOSE :: 1.70 // long-form reading, sparse density
+LEADING_MONO :: 1.40 // code / terminal output
+
+
+// ============================================================
+// TYPOGRAPHY — LETTER SPACING (in EM units)
+// Multiply by the resolved font size to get pixel spacing.
+// ============================================================
+
+TRACKING_TIGHT :: -0.02 // large headings, tightened display
+TRACKING_NORMAL :: 0.00 // body default
+TRACKING_WIDE :: 0.05 // H1/H2 ALL CAPS, button labels
+TRACKING_WIDER :: 0.10 // H5 caps, section headers
+TRACKING_WIDEST :: 0.20 // .label / .label-mono — ALL CAPS chip text
+
+
+// ============================================================
+// HEADING ROLES — paired size + tracking + casing intent
+// Casing is documentation only; these are the numbers a
+// renderer actually consumes.
+// ============================================================
+
+// H1 — page title, masthead. Title Case, ALL CAPS at display.
+H1_SIZE :: TEXT_3XL
+H1_WEIGHT :: WEIGHT_BOLD
+H1_TRACKING :: TRACKING_WIDE
+H1_LEADING :: LEADING_TIGHT
+
+// H2 — major section. ALL CAPS.
+H2_SIZE :: TEXT_2XL
+H2_WEIGHT :: WEIGHT_BOLD
+H2_TRACKING :: TRACKING_WIDE
+H2_LEADING :: LEADING_TIGHT
+
+// H3 — subsection. Sentence case, condensed semibold.
+H3_SIZE :: TEXT_XL
+H3_WEIGHT :: WEIGHT_SEMIBOLD
+H3_TRACKING :: TRACKING_NORMAL
+H3_LEADING :: LEADING_TIGHT
+
+// H4 — minor subsection.
+H4_SIZE :: TEXT_LG
+H4_WEIGHT :: WEIGHT_SEMIBOLD
+H4_TRACKING :: TRACKING_NORMAL
+H4_LEADING :: LEADING_SNUG
+
+// H5 — small caps section header (uses FG_SECONDARY).
+H5_SIZE :: TEXT_BASE
+H5_WEIGHT :: WEIGHT_SEMIBOLD
+H5_TRACKING :: TRACKING_WIDER
+H5_LEADING :: LEADING_SNUG
+
+// H6 — mono caps eyebrow / overline (uses FG_CAPTION).
+H6_SIZE :: TEXT_SM
+H6_WEIGHT :: WEIGHT_REGULAR
+H6_TRACKING :: TRACKING_WIDEST
+H6_LEADING :: LEADING_SNUG
+
+
+// ============================================================
+// LABEL ROLES — small caps annotation chips
+// ============================================================
+
+// .label — sans condensed, ALL CAPS, FG_CAPTION.
+LABEL_SIZE :: TEXT_XS
+LABEL_WEIGHT :: WEIGHT_SEMIBOLD
+LABEL_TRACKING :: TRACKING_WIDEST
+
+// .label-mono — mono ALL CAPS, FG_MUTED.
+LABEL_MONO_SIZE :: TEXT_XS
+LABEL_MONO_WEIGHT :: WEIGHT_REGULAR
+LABEL_MONO_TRACKING :: TRACKING_WIDEST
+
+
+// ============================================================
+// FOCUS RING
+// 1px solid gold outline at 2px offset. Crisp, never blurry.
+// No glow, no box-shadow halo.
+// ============================================================
+
+FOCUS_RING_WIDTH :: 1
+FOCUS_RING_OFFSET :: 2
+FOCUS_RING_COLOR :: BORDER_FOCUS // GOLD_BRIGHT
+
+
+// ============================================================
+// COMPONENT — BUTTONS
+// Cybersteel buttons are uppercase, semibold→bold, with wide
+// tracking. Default size is "md"; sm/lg shift padding + size.
+// ============================================================
+
+// Default (md) padding: vertical / horizontal
+BUTTON_PAD_Y :: 8
+BUTTON_PAD_X :: 18
+BUTTON_FONT_SIZE :: 12
+BUTTON_FONT_WEIGHT :: WEIGHT_BOLD
+BUTTON_TRACKING :: 0.07 // EM — ALL CAPS button label
+BUTTON_RADIUS :: RADIUS_MD
+BUTTON_BORDER :: BORDER_WIDTH_DEFAULT
+
+// Small button
+BUTTON_SM_PAD_Y :: 5
+BUTTON_SM_PAD_X :: 12
+BUTTON_SM_FONT_SIZE :: 10
+
+// Large button
+BUTTON_LG_PAD_Y :: 11
+BUTTON_LG_PAD_X :: 24
+BUTTON_LG_FONT_SIZE :: 14
+
+// Primary — solid gold fill, dark text. Hover brightens, press
+// flips to fg-heading (cream) fill.
+BUTTON_PRIMARY_BG :: GOLD_DIM
+BUTTON_PRIMARY_FG :: BG_SHELL
+BUTTON_PRIMARY_BORDER :: GOLD_DIM
+BUTTON_PRIMARY_BG_HOVER :: GOLD_BRIGHT
+BUTTON_PRIMARY_BORDER_HOVER :: GOLD_BRIGHT
+BUTTON_PRIMARY_BG_PRESS :: FG_HEADING
+BUTTON_PRIMARY_FG_PRESS :: BG_SHELL
+BUTTON_PRIMARY_BORDER_PRESS :: FG_HEADING
+
+// Secondary — transparent bg, structural border, hover gains
+// gold tint + gold-dim border, press fills with gold-bright.
+BUTTON_SECONDARY_BG :: [4]u8{0, 0, 0, 0} // transparent
+BUTTON_SECONDARY_FG :: FG_SECONDARY
+BUTTON_SECONDARY_BORDER :: BG_BORDER
+BUTTON_SECONDARY_BG_HOVER :: TINT_GOLD_HOVER
+BUTTON_SECONDARY_BORDER_HOVER :: GOLD_DIM
+BUTTON_SECONDARY_FG_HOVER :: FG_BODY
+BUTTON_SECONDARY_BG_PRESS :: GOLD_BRIGHT
+BUTTON_SECONDARY_FG_PRESS :: [4]u8{0xff, 0xff, 0xff, 0xff}
+BUTTON_SECONDARY_BORDER_PRESS :: GOLD_BRIGHT
+
+// Ghost — fully transparent, no border. Hover lifts to BG_ACTIVE.
+BUTTON_GHOST_BG :: [4]u8{0, 0, 0, 0}
+BUTTON_GHOST_FG :: FG_CAPTION
+BUTTON_GHOST_BORDER :: [4]u8{0, 0, 0, 0}
+BUTTON_GHOST_BG_HOVER :: BG_ACTIVE
+BUTTON_GHOST_FG_HOVER :: FG_BODY
+BUTTON_GHOST_BG_PRESS :: GOLD_DIM
+BUTTON_GHOST_FG_PRESS :: [4]u8{0xff, 0xff, 0xff, 0xff}
+
+// Danger — destructive ghost button.
+BUTTON_DANGER_BG :: [4]u8{0, 0, 0, 0}
+BUTTON_DANGER_FG :: RED_BRIGHT
+BUTTON_DANGER_BORDER :: RED_DIM
+BUTTON_DANGER_BG_HOVER :: TINT_DANGER_HOVER
+BUTTON_DANGER_BORDER_HOVER :: RED_BRIGHT
+BUTTON_DANGER_FG_HOVER :: FG_BODY
+BUTTON_DANGER_BG_PRESS :: RED_BRIGHT
+BUTTON_DANGER_FG_PRESS :: [4]u8{0xff, 0xff, 0xff, 0xff}
+BUTTON_DANGER_BORDER_PRESS :: RED_BRIGHT
+
+// Success — confirming ghost button.
+BUTTON_SUCCESS_BG :: [4]u8{0, 0, 0, 0}
+BUTTON_SUCCESS_FG :: GREEN_BRIGHT
+BUTTON_SUCCESS_BORDER :: GREEN_DIM
+BUTTON_SUCCESS_BG_HOVER :: TINT_SUCCESS_HOVER
+BUTTON_SUCCESS_BORDER_HOVER :: GREEN_BRIGHT
+BUTTON_SUCCESS_FG_HOVER :: FG_BODY
+BUTTON_SUCCESS_BG_PRESS :: GREEN_BRIGHT
+BUTTON_SUCCESS_FG_PRESS :: [4]u8{0xff, 0xff, 0xff, 0xff}
+BUTTON_SUCCESS_BORDER_PRESS :: GREEN_BRIGHT
+
+// Disabled — flat low-contrast surface, opacity-dimmed.
+BUTTON_DISABLED_BG :: BG_ACTIVE
+BUTTON_DISABLED_FG :: FG_MUTED
+BUTTON_DISABLED_BORDER :: BG_BORDER
+BUTTON_DISABLED_OPACITY :: 0.5
+
+
+// ============================================================
+// COMPONENT — CARDS
+// Flat, structural, mechanical. Background sits one step above
+// page; border is structural by default and shifts to gold-dim
+// on hover/press. Corner radius is the default 6px (RADIUS_MD).
+// ============================================================
+
+CARD_BG :: BG_SURFACE
+CARD_BORDER :: BG_BORDER
+CARD_BORDER_HOVER :: GOLD_DIM
+CARD_BG_PRESS :: BG_ACTIVE
+CARD_BORDER_PRESS :: GOLD_DIM
+CARD_RADIUS :: RADIUS_MD
+CARD_BORDER_WIDTH :: BORDER_WIDTH_DEFAULT
+CARD_PADDING :: SPACE_GROUP // 16px default inset
+
+
+// ============================================================
+// COMPONENT — INPUTS
+// Inputs sit on BG_SURFACE with structural borders. Focus
+// promotes the border to gold-bright; the focus ring follows.
+// ============================================================
+
+INPUT_BG :: BG_SURFACE
+INPUT_FG :: FG_BODY
+INPUT_PLACEHOLDER :: FG_CAPTION
+INPUT_BORDER :: BG_BORDER
+INPUT_BORDER_HOVER :: GOLD_DIM
+INPUT_BORDER_FOCUS :: GOLD_BRIGHT
+INPUT_BORDER_DANGER :: RED_DIM
+INPUT_RADIUS :: RADIUS_MD
+INPUT_PAD_Y :: SPACE_COMPONENT // 12
+INPUT_PAD_X :: SPACE_GROUP // 16
+
+
+// ============================================================
+// COMPONENT — BADGES / STATUS PILLS
+// ============================================================
+
+BADGE_FONT_SIZE :: TEXT_XS
+BADGE_WEIGHT :: WEIGHT_SEMIBOLD
+BADGE_TRACKING :: TRACKING_WIDEST
+BADGE_PAD_Y :: SPACE_CHIP // 4
+BADGE_PAD_X :: SPACE_ELEMENT // 8
+BADGE_RADIUS :: RADIUS_SM
+
+
+// ============================================================
+// COMPONENT — DECO RULE
+// Hairline Art Deco horizontal rule: 1px gold-dim top + 1px
+// structural drop, with panel-sized vertical margins.
+// ============================================================
+
+DECO_RULE_TOP_WIDTH :: 1
+DECO_RULE_TOP_COLOR :: GOLD_DIM
+DECO_RULE_DROP_WIDTH :: 1
+DECO_RULE_DROP_COLOR :: BG_BORDER
+DECO_RULE_MARGIN_Y :: SPACE_PANEL // 24
+
+
+// ============================================================
+// LAYOUT — FIXED CHROME WIDTHS
+// Sidebar widths are fixed; content lives in 8 or 12 column
+// grids. No responsive collapsing for chrome — Cybersteel UIs
+// run on real workstations.
+// ============================================================
+
+SIDEBAR_WIDTH_NARROW :: 240
+SIDEBAR_WIDTH_WIDE :: 280
+
+GRID_COLUMNS_NARROW :: 8
+GRID_COLUMNS_WIDE :: 12
+
+// Toolbar height matches SPACE_BLOCK so vertical rhythm aligns.
+TOOLBAR_HEIGHT :: SPACE_BLOCK // 32
+
+
+// ============================================================
+// CODE BLOCKS — 
+// Mono, BG_SHELL surface with a 1px structural border and a
+// 2px gold-dim accent on the left edge.
+// ============================================================
+
+CODE_INLINE_BG :: BG_SURFACE
+CODE_INLINE_FG :: GOLD_BRIGHT
+CODE_INLINE_BORDER :: BG_BORDER
+CODE_INLINE_PAD_Y :: 2
+CODE_INLINE_PAD_X :: 6
+CODE_INLINE_RADIUS :: RADIUS_SM
+
+PRE_BG :: BG_SHELL
+PRE_FG :: FG_BODY
+PRE_BORDER :: BG_BORDER
+PRE_BORDER_LEFT_COLOR :: GOLD_DIM
+PRE_BORDER_LEFT_WIDTH :: BORDER_WIDTH_ACCENT // 2
+PRE_PAD_Y :: SPACE_GROUP // 16
+PRE_PAD_X :: SPACE_PANEL // 24
+
+
+// ============================================================
+// SCANLINE OVERLAY (opt-in, terminal surfaces only)
+// Repeating-stripe pattern at very low opacity. Stripe is 2px
+// transparent + 2px black-at-3% (TINT_SCANLINE).
+// ============================================================
+
+SCANLINE_STRIPE_PX :: 2
+SCANLINE_GAP_PX :: 2
+SCANLINE_COLOR :: TINT_SCANLINE
diff --git a/draw/cybersteel/fonts/IBMPlexSans-Bold.ttf b/draw/cybersteel/fonts/IBMPlexSans-Bold.ttf
new file mode 100644
index 0000000..258c10a
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-Bold.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-BoldItalic.ttf b/draw/cybersteel/fonts/IBMPlexSans-BoldItalic.ttf
new file mode 100644
index 0000000..fabdca0
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-BoldItalic.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-ExtraLight.ttf b/draw/cybersteel/fonts/IBMPlexSans-ExtraLight.ttf
new file mode 100644
index 0000000..46f52c4
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-ExtraLight.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-ExtraLightItalic.ttf b/draw/cybersteel/fonts/IBMPlexSans-ExtraLightItalic.ttf
new file mode 100644
index 0000000..6fac7fa
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-ExtraLightItalic.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-Italic.ttf b/draw/cybersteel/fonts/IBMPlexSans-Italic.ttf
new file mode 100644
index 0000000..bf43956
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-Italic.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-Light.ttf b/draw/cybersteel/fonts/IBMPlexSans-Light.ttf
new file mode 100644
index 0000000..56e7db7
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-Light.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-LightItalic.ttf b/draw/cybersteel/fonts/IBMPlexSans-LightItalic.ttf
new file mode 100644
index 0000000..1e2e86a
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-LightItalic.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-Medium.ttf b/draw/cybersteel/fonts/IBMPlexSans-Medium.ttf
new file mode 100644
index 0000000..fb75072
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-Medium.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-MediumItalic.ttf b/draw/cybersteel/fonts/IBMPlexSans-MediumItalic.ttf
new file mode 100644
index 0000000..1b059be
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-MediumItalic.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-Regular.ttf b/draw/cybersteel/fonts/IBMPlexSans-Regular.ttf
new file mode 100644
index 0000000..5387ad4
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-Regular.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-SemiBold.ttf b/draw/cybersteel/fonts/IBMPlexSans-SemiBold.ttf
new file mode 100644
index 0000000..a63f1c5
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-SemiBold.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-SemiBoldItalic.ttf b/draw/cybersteel/fonts/IBMPlexSans-SemiBoldItalic.ttf
new file mode 100644
index 0000000..981d514
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-SemiBoldItalic.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-Thin.ttf b/draw/cybersteel/fonts/IBMPlexSans-Thin.ttf
new file mode 100644
index 0000000..918d4a0
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-Thin.ttf differ
diff --git a/draw/cybersteel/fonts/IBMPlexSans-ThinItalic.ttf b/draw/cybersteel/fonts/IBMPlexSans-ThinItalic.ttf
new file mode 100644
index 0000000..f7d1dab
Binary files /dev/null and b/draw/cybersteel/fonts/IBMPlexSans-ThinItalic.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-Bold.ttf b/draw/cybersteel/fonts/Lilex-Bold.ttf
new file mode 100644
index 0000000..ed1764b
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-Bold.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-BoldItalic.ttf b/draw/cybersteel/fonts/Lilex-BoldItalic.ttf
new file mode 100644
index 0000000..870a9f5
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-BoldItalic.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-ExtraLight.ttf b/draw/cybersteel/fonts/Lilex-ExtraLight.ttf
new file mode 100644
index 0000000..b66881c
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-ExtraLight.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-ExtraLightItalic.ttf b/draw/cybersteel/fonts/Lilex-ExtraLightItalic.ttf
new file mode 100644
index 0000000..49f9737
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-ExtraLightItalic.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-Italic.ttf b/draw/cybersteel/fonts/Lilex-Italic.ttf
new file mode 100644
index 0000000..2df4b49
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-Italic.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-Light.ttf b/draw/cybersteel/fonts/Lilex-Light.ttf
new file mode 100644
index 0000000..972d226
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-Light.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-LightItalic.ttf b/draw/cybersteel/fonts/Lilex-LightItalic.ttf
new file mode 100644
index 0000000..8edaceb
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-LightItalic.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-Medium.ttf b/draw/cybersteel/fonts/Lilex-Medium.ttf
new file mode 100644
index 0000000..6d81f5c
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-Medium.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-MediumItalic.ttf b/draw/cybersteel/fonts/Lilex-MediumItalic.ttf
new file mode 100644
index 0000000..55ed794
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-MediumItalic.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-Regular.ttf b/draw/cybersteel/fonts/Lilex-Regular.ttf
new file mode 100644
index 0000000..389c874
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-Regular.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-SemiBold.ttf b/draw/cybersteel/fonts/Lilex-SemiBold.ttf
new file mode 100644
index 0000000..828fb79
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-SemiBold.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-SemiBoldItalic.ttf b/draw/cybersteel/fonts/Lilex-SemiBoldItalic.ttf
new file mode 100644
index 0000000..02b36ee
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-SemiBoldItalic.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-Thin.ttf b/draw/cybersteel/fonts/Lilex-Thin.ttf
new file mode 100644
index 0000000..a6a4dd2
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-Thin.ttf differ
diff --git a/draw/cybersteel/fonts/Lilex-ThinItalic.ttf b/draw/cybersteel/fonts/Lilex-ThinItalic.ttf
new file mode 100644
index 0000000..58d145c
Binary files /dev/null and b/draw/cybersteel/fonts/Lilex-ThinItalic.ttf differ
diff --git a/draw/draw.odin b/draw/draw.odin
index fab8c54..1011ab9 100644
--- a/draw/draw.odin
+++ b/draw/draw.odin
@@ -4,33 +4,49 @@ import "base:runtime"
 import "core:c"
 import "core:log"
 import "core:math"
-
 import "core:strings"
 import sdl "vendor:sdl3"
 import sdl_ttf "vendor:sdl3/ttf"
 
 import clay "../vendor/clay"
 
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- Shader format ------------
+// ---------------------------------------------------------------------------------------------------------------------
+
+//INTERNAL (each constant in the when-block below)
 when ODIN_OS == .Darwin {
 	PLATFORM_SHADER_FORMAT_FLAG :: sdl.GPUShaderFormatFlag.MSL
 	SHADER_ENTRY :: cstring("main0")
 	BASE_VERT_2D_RAW :: #load("shaders/generated/base_2d.vert.metal")
 	BASE_FRAG_2D_RAW :: #load("shaders/generated/base_2d.frag.metal")
+	BACKDROP_FULLSCREEN_VERT_RAW :: #load("shaders/generated/backdrop_fullscreen.vert.metal")
+	BACKDROP_DOWNSAMPLE_FRAG_RAW :: #load("shaders/generated/backdrop_downsample.frag.metal")
+	BACKDROP_BLUR_VERT_RAW :: #load("shaders/generated/backdrop_blur.vert.metal")
+	BACKDROP_BLUR_FRAG_RAW :: #load("shaders/generated/backdrop_blur.frag.metal")
 } else {
 	PLATFORM_SHADER_FORMAT_FLAG :: sdl.GPUShaderFormatFlag.SPIRV
 	SHADER_ENTRY :: cstring("main")
 	BASE_VERT_2D_RAW :: #load("shaders/generated/base_2d.vert.spv")
 	BASE_FRAG_2D_RAW :: #load("shaders/generated/base_2d.frag.spv")
+	BACKDROP_FULLSCREEN_VERT_RAW :: #load("shaders/generated/backdrop_fullscreen.vert.spv")
+	BACKDROP_DOWNSAMPLE_FRAG_RAW :: #load("shaders/generated/backdrop_downsample.frag.spv")
+	BACKDROP_BLUR_VERT_RAW :: #load("shaders/generated/backdrop_blur.vert.spv")
+	BACKDROP_BLUR_FRAG_RAW :: #load("shaders/generated/backdrop_blur.frag.spv")
 }
+
 PLATFORM_SHADER_FORMAT :: sdl.GPUShaderFormat{PLATFORM_SHADER_FORMAT_FLAG}
 
-BUFFER_INIT_SIZE :: 256
-INITIAL_LAYER_SIZE :: 5
-INITIAL_SCISSOR_SIZE :: 10
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- Defaults and config ------------
+// ---------------------------------------------------------------------------------------------------------------------
 
-// Sentinel value: when passed as msaa_samples, `init` will use the maximum MSAA sample count
-// supported by the GPU for the swapchain format.
-MSAA_MAX :: sdl.GPUSampleCount(0xFF)
+//INTERNAL
+BUFFER_INIT_SIZE :: 256
+//INTERNAL
+INITIAL_LAYER_SIZE :: 5
+//INTERNAL
+INITIAL_SCISSOR_SIZE :: 10
 
 // ----- Default parameter values -----
 // Named constants for non-zero default procedure parameters. Centralizes magic numbers
@@ -39,73 +55,76 @@ DFT_FEATHER_PX :: 1 // Total AA feather width in physical pixels (half on each s
 DFT_STROKE_THICKNESS :: 1 // Default line/stroke thickness in logical pixels.
 DFT_FONT_SIZE :: 44 // Default font size in points for text rendering.
 DFT_CIRC_END_ANGLE :: 360 // Full-circle end angle in degrees (ring/arc).
-DFT_UV_RECT :: Rectangle{0, 0, 1, 1} // Full-texture UV rect (rectangle_texture).
-DFT_TINT :: WHITE // Default texture tint (rectangle_texture, clay_image).
+DFT_UV_RECT :: Rectangle{0, 0, 1, 1} // Full-texture UV rect (Texture_Fill default).
+DFT_TINT :: WHITE // Default texture tint (Texture_Fill, clay_image).
 DFT_TEXT_COLOR :: BLACK // Default text color.
 DFT_CLEAR_COLOR :: BLACK // Default clear color for end().
 DFT_SAMPLER :: Sampler_Preset.Linear_Clamp // Default texture sampler preset.
 
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- Global state ------------
+// ---------------------------------------------------------------------------------------------------------------------
+
+//INTERNAL
 GLOB: Global
 
+//INTERNAL
 Global :: struct {
 	// -- Per-frame staging (hottest — touched by every prepare/upload/clear cycle) --
-	tmp_shape_verts:          [dynamic]Vertex, // Tessellated shape vertices staged for GPU upload.
-	tmp_text_verts:           [dynamic]Vertex, // Text vertices staged for GPU upload.
-	tmp_text_indices:         [dynamic]c.int, // Text index buffer staged for GPU upload.
-	tmp_text_batches:         [dynamic]TextBatch, // Text atlas batch metadata for indexed drawing.
-	tmp_primitives:           [dynamic]Primitive, // SDF primitives staged for GPU storage buffer upload.
-	tmp_sub_batches:          [dynamic]Sub_Batch, // Sub-batch records that drive draw call dispatch.
-	tmp_uncached_text:        [dynamic]^sdl_ttf.Text, // Uncached TTF_Text objects destroyed after end() submits.
-	layers:                   [dynamic]Layer, // Draw layers, each with its own scissor stack.
-	scissors:                 [dynamic]Scissor, // Scissor rects that clip drawing within each layer.
+	tmp_shape_verts:              [dynamic]Vertex_2D, // Tessellated shape vertices staged for GPU upload.
+	tmp_text_verts:               [dynamic]Vertex_2D, // Text vertices staged for GPU upload.
+	tmp_text_indices:             [dynamic]c.int, // Text index buffer staged for GPU upload.
+	tmp_text_batches:             [dynamic]Text_Batch, // Text atlas batch metadata for indexed drawing.
+	tmp_primitives:               [dynamic]Core_2D_Primitive, // SDF primitives staged for GPU storage buffer upload (core 2D subsystem).
+	tmp_sub_batches:              [dynamic]Sub_Batch, // Sub-batch records that drive draw call dispatch.
+	tmp_uncached_text:            [dynamic]^sdl_ttf.Text, // Uncached TTF_Text objects destroyed after end() submits.
+	tmp_gaussian_blur_primitives: [dynamic]Gaussian_Blur_Primitive, // Gaussian blur primitives staged for GPU storage buffer upload.
+	layers:                       [dynamic]Layer, // Draw layers, each with its own scissor stack.
+	scissors:                     [dynamic]Scissor, // Scissor rects that clip drawing within each layer.
 
 	// -- Per-frame scalars (accessed during prepare and draw_layer) --
-	curr_layer_index:         uint, // Index of the currently active layer.
-	dpi_scaling:              f32, // Window DPI scale factor applied to all pixel coordinates.
-	clay_z_index:             i16, // Tracks z-index for layer splitting during Clay batch processing.
-	cleared:                  bool, // Whether the render target has been cleared this frame.
+	curr_layer_index:             uint, // Index of the currently active layer.
+	dpi_scaling:                  f32, // Window DPI scale factor applied to all pixel coordinates.
+	clay_z_index:                 i16, // Tracks z-index for layer splitting during Clay batch processing.
+	cleared:                      bool, // Whether the render target has been cleared this frame.
 
-	// -- Pipeline (accessed every draw_layer call) --
-	pipeline_2d_base:         Pipeline_2D_Base, // The unified 2D GPU pipeline (shaders, buffers, samplers).
-	device:                   ^sdl.GPUDevice, // GPU device handle, stored at init.
-	samplers:                 [SAMPLER_PRESET_COUNT]^sdl.GPUSampler, // Lazily-created sampler objects, one per Sampler_Preset.
+	// -- Subsystems (accessed every draw_layer call) --
+	core_2d:                      Core_2D, // The unified 2D GPU pipeline (shaders, buffers, samplers).
+	backdrop:                     Backdrop, // Frosted-glass backdrop blur subsystem (downsample + blur PSOs, working textures).
+	device:                       ^sdl.GPUDevice, // GPU device handle, stored at init.
+	samplers:                     [SAMPLER_PRESET_COUNT]^sdl.GPUSampler, // Lazily-created sampler objects, one per Sampler_Preset.
 
 	// -- Deferred release (processed once per frame at frame boundary) --
-	pending_texture_releases: [dynamic]Texture_Id, // Deferred GPU texture releases, processed next frame.
-	pending_text_releases:    [dynamic]^sdl_ttf.Text, // Deferred TTF_Text destroys, processed next frame.
+	pending_texture_releases:     [dynamic]Texture_Id, // Deferred GPU texture releases, processed next frame.
+	pending_text_releases:        [dynamic]^sdl_ttf.Text, // Deferred TTF_Text destroys, processed next frame.
 
 	// -- Textures (registration is occasional, binding is per draw call) --
-	texture_slots:            [dynamic]Texture_Slot, // Registered texture slots indexed by Texture_Id.
-	texture_free_list:        [dynamic]u32, // Recycled slot indices available for reuse.
-
-	// -- MSAA (once per frame in end()) --
-	msaa_texture:             ^sdl.GPUTexture, // Intermediate render target for multi-sample resolve.
-	msaa_width:               u32, // Cached width to detect when MSAA texture needs recreation.
-	msaa_height:              u32, // Cached height to detect when MSAA texture needs recreation.
-	sample_count:             sdl.GPUSampleCount, // Sample count chosen at init (._1 means MSAA disabled).
+	texture_slots:                [dynamic]Texture_Slot, // Registered texture slots indexed by Texture_Id.
+	texture_free_list:            [dynamic]u32, // Recycled slot indices available for reuse.
 
 	// -- Clay (once per frame in prepare_clay_batch) --
-	clay_memory:              [^]u8, // Raw memory block backing Clay's internal arena.
+	clay_memory:                  [^]u8, // Raw memory block backing Clay's internal arena.
 
 	// -- Text (occasional — font registration and text cache lookups) --
-	text_cache:               Text_Cache, // Font registry, SDL_ttf engine, and cached TTF_Text objects.
+	text_cache:                   Text_Cache, // Font registry, SDL_ttf engine, and cached TTF_Text objects.
 
 	// -- Resize tracking (cold — checked once per frame in resize_global) --
-	max_layers:               int, // High-water marks for dynamic array shrink heuristic.
-	max_scissors:             int,
-	max_shape_verts:          int,
-	max_text_verts:           int,
-	max_text_indices:         int,
-	max_text_batches:         int,
-	max_primitives:           int,
-	max_sub_batches:          int,
+	max_layers:                   int, // High-water marks for dynamic array shrink heuristic.
+	max_scissors:                 int,
+	max_shape_verts:              int,
+	max_text_verts:               int,
+	max_text_indices:             int,
+	max_text_batches:             int,
+	max_primitives:               int,
+	max_sub_batches:              int,
+	max_gaussian_blur_primitives: int,
 
 	// -- Init-only (coldest — set once at init, never written again) --
-	odin_context:             runtime.Context, // Odin context captured at init for use in callbacks.
+	odin_context:                 runtime.Context, // Odin context captured at init for use in callbacks.
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
-// ----- Core types --------------------
+// ----- Core types ------------
 // ---------------------------------------------------------------------------------------------------------------------
 
 // A 2D position in world space. Non-distinct alias for [2]f32 — bare literals like {100, 200}
@@ -128,8 +147,8 @@ Vec2 :: [2]f32
 // transparent. This matches the GPU-side layout: the shader unpacks via unpackUnorm4x8 which
 // reads the bytes in memory order as R, G, B, A and normalizes each to [0, 1].
 //
-// When used in the Primitive struct (Primitive.color), the 4 bytes are stored as a u32 in
-// native byte order and unpacked by the shader.
+// When used in the Core_2D_Primitive or Gaussian_Blur_Primitive structs (e.g. .color), the 4 bytes
+// are stored as a u32 in native byte order and unpacked by the shader.
 Color :: [4]u8
 
 BLACK :: Color{0, 0, 0, 255}
@@ -139,6 +158,13 @@ GREEN :: Color{0, 255, 0, 255}
 BLUE :: Color{0, 0, 255, 255}
 BLANK :: Color{0, 0, 0, 0}
 
+Rectangle :: struct {
+	x:      f32,
+	y:      f32,
+	width:  f32,
+	height: f32,
+}
+
 // Per-corner rounding radii for rectangles, specified clockwise from top-left.
 // All values are in logical pixels (pre-DPI-scaling).
 Rectangle_Radii :: struct {
@@ -149,29 +175,42 @@ Rectangle_Radii :: struct {
 }
 
 // A linear gradient between two colors along an arbitrary angle.
-// The `end_color` is the color at the end of the gradient direction; the shape's fill `color`
-// parameter acts as the start color. `angle` is in degrees: 0 = left-to-right, 90 = top-to-bottom.
+// `angle` is in degrees: 0 = left-to-right, 90 = top-to-bottom.
 Linear_Gradient :: struct {
-	end_color: Color,
-	angle:     f32,
+	start_color: Color,
+	end_color:   Color,
+	angle:       f32,
 }
 
 // A radial gradient between two colors from center to edge.
-// The `outer_color` is the color at the shape's edge; the shape's fill `color` parameter
-// acts as the inner (center) color.
 Radial_Gradient :: struct {
+	inner_color: Color,
 	outer_color: Color,
 }
 
-// Tagged union for specifying a gradient on any shape. Defaults to `nil` (no gradient).
-// When a gradient is active, the shape's `color` parameter becomes the start/inner color,
-// and the gradient struct carries the end/outer color plus any type-specific parameters.
-//
-// Gradient and Textured are mutually exclusive on the same primitive. If a shape uses
-// `rectangle_texture`, gradients are not applicable — use the tint color instead.
-Gradient :: union {
+// Sample a registered texture as the shape's fill source.
+// `tint` modulates the sampled texels per-pixel (constant multiply); WHITE passes through
+// unchanged. Translucent tints fade the texture; non-white tints recolor it.
+// Zero-initialized fields are treated as defaults by the shape procs:
+//   tint    == Color{}      → WHITE
+//   uv_rect == Rectangle{}  → {0, 0, 1, 1}  (full texture)
+//   sampler == .Linear_Clamp (enum value 0)
+Texture_Fill :: struct {
+	id:      Texture_Id,
+	tint:    Color,
+	uv_rect: Rectangle,
+	sampler: Sampler_Preset,
+}
+
+// Mutually exclusive fill sources for shape procs. Each shape proc accepts a Brush
+// as its third positional parameter. Texture and gradient are mutually exclusive at
+// the GPU level (they share the worst-case register path); outline is orthogonal and
+// composes with any Brush variant.
+Brush :: union {
+	Color,
 	Linear_Gradient,
 	Radial_Gradient,
+	Texture_Fill,
 }
 
 // Convert clay.Color ([4]c.float in 0–255 range) to Color.
@@ -188,7 +227,7 @@ color_to_f32 :: proc(color: Color) -> [4]f32 {
 // Pre-multiply RGB channels by alpha. The tessellated vertex path and text path require
 // premultiplied colors because the blend state is ONE, ONE_MINUS_SRC_ALPHA and the
 // tessellated fragment shader passes vertex color through without further modification.
-// Users who construct Vertex structs manually for prepare_shape must premultiply their colors.
+// Users who construct Vertex_2D structs manually for prepare_shape must premultiply their colors.
 premultiply_color :: #force_inline proc(color: Color) -> Color {
 	a := u32(color[3])
 	return Color {
@@ -199,25 +238,31 @@ premultiply_color :: #force_inline proc(color: Color) -> Color {
 	}
 }
 
-Rectangle :: struct {
-	x:      f32,
-	y:      f32,
-	width:  f32,
-	height: f32,
-}
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- Frame layout types ------------
+// ---------------------------------------------------------------------------------------------------------------------
 
+//INTERNAL
 Sub_Batch_Kind :: enum u8 {
-	Tessellated, // non-indexed, white texture or user texture, mode 0
-	Text, // indexed, atlas texture, mode 0
-	SDF, // instanced unit quad, white texture or user texture, mode 1
+	Tessellated, // non-indexed, white texture or user texture, Core_2D_Mode.Tessellated
+	Text, // indexed, atlas texture, Core_2D_Mode.Tessellated
+	SDF, // instanced unit quad, Core_2D_Mode.SDF
+	// instanced unit quad, backdrop subsystem V-composite (indexes Gaussian_Blur_Primitive).
+	// Bracket-scheduled per layer; see README.md § "Backdrop pipeline" for ordering semantics.
+	Backdrop,
 }
 
+//INTERNAL
 Sub_Batch :: struct {
-	kind:       Sub_Batch_Kind,
-	offset:     u32, // Tessellated: vertex offset; Text: text_batch index; SDF: primitive index
-	count:      u32, // Tessellated: vertex count; Text: always 1; SDF: primitive count
-	texture_id: Texture_Id,
-	sampler:    Sampler_Preset,
+	kind:           Sub_Batch_Kind,
+	offset:         u32, // Tessellated: vertex offset; Text: text_batch index; SDF/Backdrop: primitive index
+	count:          u32, // Tessellated: vertex count; Text: always 1; SDF/Backdrop: primitive count
+	texture_id:     Texture_Id,
+	sampler:        Sampler_Preset,
+	// Backdrop only — Gaussian std-dev in logical pixels. Named with the
+	// distribution prefix because future kinds may want different sigma
+	// shapes (e.g. drop-shadow penumbra) without overloading this field.
+	gaussian_sigma: f32,
 }
 
 Layer :: struct {
@@ -228,70 +273,85 @@ Layer :: struct {
 	scissor_len:     u32,
 }
 
+//INTERNAL
 Scissor :: struct {
 	bounds:          sdl.Rect,
 	sub_batch_start: u32,
 	sub_batch_len:   u32,
 }
 
-Init_Options :: struct {
-	// MSAA sample count. Default is ._1 (no MSAA). SDF rendering does not benefit from MSAA
-	// because SDF fragments compute coverage analytically via `smoothstep`. MSAA helps for
-	// text glyph edges and tessellated user geometry. Set to ._4 or ._8 for text-heavy UIs,
-	// or use `MSAA_MAX` to request the highest sample count the GPU supports for the swapchain
-	// format.
-	msaa_samples: sdl.GPUSampleCount,
-}
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- Lifecycle ------------
+// ---------------------------------------------------------------------------------------------------------------------
 
 // Initialize the renderer. Returns false if GPU pipeline or text engine creation fails.
+//
+// MSAA is intentionally NOT supported. SDF text and shapes compute coverage analytically via
+// `smoothstep`, so they don't benefit from multisampling. Tessellated user geometry submitted
+// via `prepare_shape` is not anti-aliased — if you need AA on tessellated content, render it
+// to your own offscreen target and submit it as a texture. RAD Debugger and the SBC target
+// (Mali Valhall, where MSAA's per-tile bandwidth multiplier is expensive) drove this decision.
 @(require_results)
 init :: proc(
 	device: ^sdl.GPUDevice,
 	window: ^sdl.Window,
-	options: Init_Options = {},
 	allocator := context.allocator,
 	odin_context := context,
 ) -> (
 	ok: bool,
 ) {
 	min_memory_size: c.size_t = cast(c.size_t)clay.MinMemorySize()
-	resolved_sample_count := options.msaa_samples
-	if resolved_sample_count == MSAA_MAX {
-		resolved_sample_count = max_sample_count(device, window)
+
+	core, core_ok := create_core_2d(device, window)
+	if !core_ok {
+		return false
 	}
 
-	pipeline, pipeline_ok := create_pipeline_2d_base(device, window, resolved_sample_count)
-	if !pipeline_ok {
+	backdrop, backdrop_ok := create_backdrop(device, window)
+	if !backdrop_ok {
+		destroy_core_2d(device, &core)
 		return false
 	}
 
 	text_cache, text_ok := init_text_cache(device, allocator)
 	if !text_ok {
-		destroy_pipeline_2d_base(device, &pipeline)
+		destroy_backdrop(device, &backdrop)
+		destroy_core_2d(device, &core)
 		return false
 	}
 
 	GLOB = Global {
-		layers                   = make([dynamic]Layer, 0, INITIAL_LAYER_SIZE, allocator = allocator),
-		scissors                 = make([dynamic]Scissor, 0, INITIAL_SCISSOR_SIZE, allocator = allocator),
-		tmp_shape_verts          = make([dynamic]Vertex, 0, BUFFER_INIT_SIZE, allocator = allocator),
-		tmp_text_verts           = make([dynamic]Vertex, 0, BUFFER_INIT_SIZE, allocator = allocator),
-		tmp_text_indices         = make([dynamic]c.int, 0, BUFFER_INIT_SIZE, allocator = allocator),
-		tmp_text_batches         = make([dynamic]TextBatch, 0, BUFFER_INIT_SIZE, allocator = allocator),
-		tmp_primitives           = make([dynamic]Primitive, 0, BUFFER_INIT_SIZE, allocator = allocator),
-		tmp_sub_batches          = make([dynamic]Sub_Batch, 0, BUFFER_INIT_SIZE, allocator = allocator),
-		tmp_uncached_text        = make([dynamic]^sdl_ttf.Text, 0, 16, allocator = allocator),
-		device                   = device,
-		texture_slots            = make([dynamic]Texture_Slot, 0, 16, allocator = allocator),
-		texture_free_list        = make([dynamic]u32, 0, 16, allocator = allocator),
-		pending_texture_releases = make([dynamic]Texture_Id, 0, 16, allocator = allocator),
-		pending_text_releases    = make([dynamic]^sdl_ttf.Text, 0, 16, allocator = allocator),
-		odin_context             = odin_context,
-		dpi_scaling              = sdl.GetWindowDisplayScale(window),
-		clay_memory              = make([^]u8, min_memory_size, allocator = allocator),
-		sample_count             = resolved_sample_count,
-		pipeline_2d_base         = pipeline,
-		text_cache               = text_cache,
+		layers                       = make([dynamic]Layer, 0, INITIAL_LAYER_SIZE, allocator = allocator),
+		scissors                     = make([dynamic]Scissor, 0, INITIAL_SCISSOR_SIZE, allocator = allocator),
+		tmp_shape_verts              = make([dynamic]Vertex_2D, 0, BUFFER_INIT_SIZE, allocator = allocator),
+		tmp_text_verts               = make([dynamic]Vertex_2D, 0, BUFFER_INIT_SIZE, allocator = allocator),
+		tmp_text_indices             = make([dynamic]c.int, 0, BUFFER_INIT_SIZE, allocator = allocator),
+		tmp_text_batches             = make([dynamic]Text_Batch, 0, BUFFER_INIT_SIZE, allocator = allocator),
+		tmp_primitives               = make(
+			[dynamic]Core_2D_Primitive,
+			0,
+			BUFFER_INIT_SIZE,
+			allocator = allocator,
+		),
+		tmp_sub_batches              = make([dynamic]Sub_Batch, 0, BUFFER_INIT_SIZE, allocator = allocator),
+		tmp_uncached_text            = make([dynamic]^sdl_ttf.Text, 0, 16, allocator = allocator),
+		tmp_gaussian_blur_primitives = make(
+			[dynamic]Gaussian_Blur_Primitive,
+			0,
+			BUFFER_INIT_SIZE,
+			allocator = allocator,
+		),
+		device                       = device,
+		texture_slots                = make([dynamic]Texture_Slot, 0, 16, allocator = allocator),
+		texture_free_list            = make([dynamic]u32, 0, 16, allocator = allocator),
+		pending_texture_releases     = make([dynamic]Texture_Id, 0, 16, allocator = allocator),
+		pending_text_releases        = make([dynamic]^sdl_ttf.Text, 0, 16, allocator = allocator),
+		odin_context                 = odin_context,
+		dpi_scaling                  = sdl.GetWindowDisplayScale(window),
+		clay_memory                  = make([^]u8, min_memory_size, allocator = allocator),
+		core_2d                      = core,
+		backdrop                     = backdrop,
+		text_cache                   = text_cache,
 	}
 
 	// Reserve slot 0 for INVALID_TEXTURE
@@ -325,6 +385,8 @@ resize_global :: proc() {
 	shrink(&GLOB.tmp_primitives, GLOB.max_primitives)
 	if len(GLOB.tmp_sub_batches) > GLOB.max_sub_batches do GLOB.max_sub_batches = len(GLOB.tmp_sub_batches)
 	shrink(&GLOB.tmp_sub_batches, GLOB.max_sub_batches)
+	if len(GLOB.tmp_gaussian_blur_primitives) > GLOB.max_gaussian_blur_primitives do GLOB.max_gaussian_blur_primitives = len(GLOB.tmp_gaussian_blur_primitives)
+	shrink(&GLOB.tmp_gaussian_blur_primitives, GLOB.max_gaussian_blur_primitives)
 }
 
 destroy :: proc(device: ^sdl.GPUDevice, allocator := context.allocator) {
@@ -336,22 +398,21 @@ destroy :: proc(device: ^sdl.GPUDevice, allocator := context.allocator) {
 	delete(GLOB.tmp_text_batches)
 	delete(GLOB.tmp_primitives)
 	delete(GLOB.tmp_sub_batches)
+	delete(GLOB.tmp_gaussian_blur_primitives)
 	for ttf_text in GLOB.tmp_uncached_text do sdl_ttf.DestroyText(ttf_text)
 	delete(GLOB.tmp_uncached_text)
 	free(GLOB.clay_memory, allocator)
-	if GLOB.msaa_texture != nil {
-		sdl.ReleaseGPUTexture(device, GLOB.msaa_texture)
-	}
 	process_pending_texture_releases()
 	destroy_all_textures()
 	destroy_sampler_pool()
 	for ttf_text in GLOB.pending_text_releases do sdl_ttf.DestroyText(ttf_text)
 	delete(GLOB.pending_text_releases)
-	destroy_pipeline_2d_base(device, &GLOB.pipeline_2d_base)
+	destroy_backdrop(device, &GLOB.backdrop)
+	destroy_core_2d(device, &GLOB.core_2d)
 	destroy_text_cache()
 }
 
-// Internal
+//INTERNAL
 clear_global :: proc() {
 	// Process deferred texture releases from the previous frame
 	process_pending_texture_releases()
@@ -373,32 +434,11 @@ clear_global :: proc() {
 	clear(&GLOB.tmp_text_batches)
 	clear(&GLOB.tmp_primitives)
 	clear(&GLOB.tmp_sub_batches)
+	clear(&GLOB.tmp_gaussian_blur_primitives)
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
-// ----- Text measurement (Clay) -------
-// ---------------------------------------------------------------------------------------------------------------------
-
-@(private = "file")
-measure_text_clay :: proc "c" (
-	text: clay.StringSlice,
-	config: ^clay.TextElementConfig,
-	user_data: rawptr,
-) -> clay.Dimensions {
-	context = GLOB.odin_context
-	text := string(text.chars[:text.length])
-	c_text := strings.clone_to_cstring(text, context.temp_allocator)
-	defer delete(c_text, context.temp_allocator)
-	width, height: c.int
-	if !sdl_ttf.GetStringSize(get_font(config.fontId, config.fontSize), c_text, 0, &width, &height) {
-		log.panicf("Failed to measure text: %s", sdl.GetError())
-	}
-
-	return clay.Dimensions{width = f32(width) / GLOB.dpi_scaling, height = f32(height) / GLOB.dpi_scaling}
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- Frame lifecycle ---------------
+// ----- Frame ------------
 // ---------------------------------------------------------------------------------------------------------------------
 
 // Sets up renderer to begin upload to the GPU. Returns starting `Layer` to begin processing primitives for.
@@ -450,135 +490,97 @@ new_layer :: proc(prev_layer: ^Layer, bounds: Rectangle) -> ^Layer {
 	return &GLOB.layers[GLOB.curr_layer_index]
 }
 
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- Built-in primitive processing --
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Submit shape vertices (colored triangles) to the given layer for rendering.
-// TODO: Should probably be renamed to better match tesselated naming conventions in the library.
-prepare_shape :: proc(layer: ^Layer, vertices: []Vertex) {
-	if len(vertices) == 0 do return
-	offset := u32(len(GLOB.tmp_shape_verts))
-	append(&GLOB.tmp_shape_verts, ..vertices)
-	scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1]
-	append_or_extend_sub_batch(scissor, layer, .Tessellated, offset, u32(len(vertices)))
-}
-
-// Submit an SDF primitive to the given layer for rendering.
-prepare_sdf_primitive :: proc(layer: ^Layer, prim: Primitive) {
-	offset := u32(len(GLOB.tmp_primitives))
-	append(&GLOB.tmp_primitives, prim)
-	scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1]
-	append_or_extend_sub_batch(scissor, layer, .SDF, offset, 1)
-}
-
-// Submit a text element to the given layer for rendering.
-// Copies SDL_ttf vertices directly (with baked position) and copies indices for indexed drawing.
-prepare_text :: proc(layer: ^Layer, text: Text) {
-	data := sdl_ttf.GetGPUTextDrawData(text.sdl_text)
-	if data == nil {
-		return // nil is normal for empty text
+// Render primitives. clear_color is the background fill before any layers are drawn.
+end :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window, clear_color: Color = DFT_CLEAR_COLOR) {
+	cmd_buffer := sdl.AcquireGPUCommandBuffer(device)
+	if cmd_buffer == nil {
+		log.panicf("Failed to acquire GPU command buffer: %s", sdl.GetError())
 	}
 
-	scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1]
+	// Pre-scan: if any layer this frame has a backdrop sub-batch, route the entire frame to
+	// source_texture so the bracket can sample the pre-bracket framebuffer without a mid-
+	// frame texture copy. Frames without any backdrop hit the existing fast path and never
+	// touch the backdrop pipeline's working textures.
+	has_backdrop := frame_has_backdrop()
 
-	// Snap base position to integer physical pixels to avoid atlas sub-pixel
-	// sampling blur (and the off-by-one bottom-row clip that comes with it).
-	base_x := math.round(text.position[0] * GLOB.dpi_scaling)
-	base_y := math.round(text.position[1] * GLOB.dpi_scaling)
+	// Upload primitives to GPU (vertices, indices, SDF prims, and backdrop prims share one
+	// copy pass so we pay the BeginGPUCopyPass / EndGPUCopyPass cost once per frame).
+	copy_pass := sdl.BeginGPUCopyPass(cmd_buffer)
+	upload(device, copy_pass)
+	if has_backdrop {
+		upload_backdrop_primitives(device, copy_pass)
+	}
+	sdl.EndGPUCopyPass(copy_pass)
 
-	// Premultiply text color once — reused across all glyph vertices.
-	pm_color := premultiply_color(text.color)
+	swapchain_texture: ^sdl.GPUTexture
+	width, height: u32
+	if !sdl.WaitAndAcquireGPUSwapchainTexture(cmd_buffer, window, &swapchain_texture, &width, &height) {
+		log.panicf("Failed to acquire swapchain texture: %s", sdl.GetError())
+	}
 
-	for data != nil {
-		vertex_start := u32(len(GLOB.tmp_text_verts))
-		index_start := u32(len(GLOB.tmp_text_indices))
-
-		// Copy vertices with baked position offset
-		for i in 0 ..< data.num_vertices {
-			pos := data.xy[i]
-			uv := data.uv[i]
-			append(
-				&GLOB.tmp_text_verts,
-				Vertex{position = {pos.x + base_x, -pos.y + base_y}, uv = {uv.x, uv.y}, color = pm_color},
-			)
+	if swapchain_texture == nil {
+		// Window is minimized or not visible — submit and skip this frame
+		if !sdl.SubmitGPUCommandBuffer(cmd_buffer) {
+			log.panicf("Failed to submit GPU command buffer (minimized window): %s", sdl.GetError())
 		}
-
-		// Copy indices directly
-		append(&GLOB.tmp_text_indices, ..data.indices[:data.num_indices])
-
-		batch_idx := u32(len(GLOB.tmp_text_batches))
-		append(
-			&GLOB.tmp_text_batches,
-			TextBatch {
-				atlas_texture = data.atlas_texture,
-				vertex_start = vertex_start,
-				vertex_count = u32(data.num_vertices),
-				index_start = index_start,
-				index_count = u32(data.num_indices),
-			},
-		)
-
-		// Each atlas chunk is a separate sub-batch (different atlas textures can't coalesce)
-		append_or_extend_sub_batch(scissor, layer, .Text, batch_idx, 1)
-
-		data = data.next
-	}
-}
-
-// Submit a text element with a 2D affine transform applied to vertices.
-// Used by the high-level `text` proc when rotation or a non-zero origin is specified.
-// NOTE: xform must be in physical (DPI-scaled) pixel space — the caller pre-scales
-// pos and origin by GLOB.dpi_scaling before building the transform.
-prepare_text_transformed :: proc(layer: ^Layer, text: Text, transform: Transform_2D) {
-	data := sdl_ttf.GetGPUTextDrawData(text.sdl_text)
-	if data == nil {
 		return
 	}
 
-	scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1]
+	render_texture := swapchain_texture
+	if has_backdrop {
+		ensure_backdrop_textures(device, sdl.GetGPUSwapchainTextureFormat(device, window), width, height)
+		render_texture = GLOB.backdrop.source_texture
+	}
 
-	// Premultiply text color once — reused across all glyph vertices.
-	pm_color := premultiply_color(text.color)
+	// Premultiply clear color: the blend state is ONE, ONE_MINUS_SRC_ALPHA (premultiplied),
+	// so the clear color must also be premultiplied for correct background compositing.
+	clear_color_straight := color_to_f32(clear_color)
+	clear_alpha := clear_color_straight[3]
+	clear_color_f32 := [4]f32 {
+		clear_color_straight[0] * clear_alpha,
+		clear_color_straight[1] * clear_alpha,
+		clear_color_straight[2] * clear_alpha,
+		clear_alpha,
+	}
 
-	for data != nil {
-		vertex_start := u32(len(GLOB.tmp_text_verts))
-		index_start := u32(len(GLOB.tmp_text_indices))
+	// Draw layers. One render pass per layer; sub-batches draw in submission order within each scissor.
+	for &layer, index in GLOB.layers {
+		draw_layer(device, window, cmd_buffer, render_texture, width, height, clear_color_f32, &layer)
+	}
 
-		for i in 0 ..< data.num_vertices {
-			pos := data.xy[i]
-			uv := data.uv[i]
-			// SDL_ttf gives glyph positions in physical pixels relative to text origin.
-			// The transform is already in physical-pixel space (caller pre-scaled),
-			// so we apply directly — no per-vertex DPI divide/multiply.
-			append(
-				&GLOB.tmp_text_verts,
-				Vertex{position = apply_transform(transform, {pos.x, -pos.y}), uv = {uv.x, uv.y}, color = pm_color},
-			)
-		}
-
-		append(&GLOB.tmp_text_indices, ..data.indices[:data.num_indices])
-
-		batch_idx := u32(len(GLOB.tmp_text_batches))
-		append(
-			&GLOB.tmp_text_batches,
-			TextBatch {
-				atlas_texture = data.atlas_texture,
-				vertex_start = vertex_start,
-				vertex_count = u32(data.num_vertices),
-				index_start = index_start,
-				index_count = u32(data.num_indices),
-			},
+	// When we rendered into source_texture, copy it to the swapchain. Single
+	// CopyGPUTextureToTexture call per frame, only when backdrop content was present.
+	if has_backdrop {
+		copy_pass := sdl.BeginGPUCopyPass(cmd_buffer)
+		sdl.CopyGPUTextureToTexture(
+			copy_pass,
+			sdl.GPUTextureLocation{texture = GLOB.backdrop.source_texture},
+			sdl.GPUTextureLocation{texture = swapchain_texture},
+			width,
+			height,
+			1,
+			false,
 		)
+		sdl.EndGPUCopyPass(copy_pass)
+	}
 
-		append_or_extend_sub_batch(scissor, layer, .Text, batch_idx, 1)
-
-		data = data.next
+	if !sdl.SubmitGPUCommandBuffer(cmd_buffer) {
+		log.panicf("Failed to submit GPU command buffer: %s", sdl.GetError())
 	}
 }
 
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- Sub-batch dispatch ------------
+// ---------------------------------------------------------------------------------------------------------------------
+
 // Append a new sub-batch or extend the last one if same kind and contiguous.
-@(private)
+//
+// `gaussian_sigma` is only consulted for kind == .Backdrop; two .Backdrop sub-batches with
+// different sigmas cannot coalesce because they require separate H+V blur passes in the
+// bracket scheduler. Float equality is intentional — user-supplied literal sigmas (e.g.
+// `sigma = 12`) produce bit-identical floats, and the worst case for two sigmas that differ
+// only by a ulp is one extra pass pair (correct, just slightly suboptimal).
+//INTERNAL
 append_or_extend_sub_batch :: proc(
 	scissor: ^Scissor,
 	layer: ^Layer,
@@ -587,6 +589,7 @@ append_or_extend_sub_batch :: proc(
 	count: u32,
 	texture_id: Texture_Id = INVALID_TEXTURE,
 	sampler: Sampler_Preset = DFT_SAMPLER,
+	gaussian_sigma: f32 = 0,
 ) {
 	if scissor.sub_batch_len > 0 {
 		last := &GLOB.tmp_sub_batches[scissor.sub_batch_start + scissor.sub_batch_len - 1]
@@ -594,21 +597,29 @@ append_or_extend_sub_batch :: proc(
 		   kind != .Text &&
 		   last.offset + last.count == offset &&
 		   last.texture_id == texture_id &&
-		   last.sampler == sampler {
+		   last.sampler == sampler &&
+		   (kind != .Backdrop || last.gaussian_sigma == gaussian_sigma) {
 			last.count += count
 			return
 		}
 	}
 	append(
 		&GLOB.tmp_sub_batches,
-		Sub_Batch{kind = kind, offset = offset, count = count, texture_id = texture_id, sampler = sampler},
+		Sub_Batch {
+			kind = kind,
+			offset = offset,
+			count = count,
+			texture_id = texture_id,
+			sampler = sampler,
+			gaussian_sigma = gaussian_sigma,
+		},
 	)
 	scissor.sub_batch_len += 1
 	layer.sub_batch_len += 1
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
-// ----- Clay ------------------------
+// ----- Clay ------------
 // ---------------------------------------------------------------------------------------------------------------------
 
 @(private = "file")
@@ -617,6 +628,24 @@ clay_error_handler :: proc "c" (errorData: clay.ErrorData) {
 	log.error("Clay error:", errorData.errorType, errorData.errorText)
 }
 
+@(private = "file")
+measure_text_clay :: proc "c" (
+	text: clay.StringSlice,
+	config: ^clay.TextElementConfig,
+	user_data: rawptr,
+) -> clay.Dimensions {
+	context = GLOB.odin_context
+	text := string(text.chars[:text.length])
+	c_text := strings.clone_to_cstring(text, context.temp_allocator)
+	defer delete(c_text, context.temp_allocator)
+	width, height: c.int
+	if !sdl_ttf.GetStringSize(get_font(config.fontId, config.fontSize), c_text, 0, &width, &height) {
+		log.panicf("Failed to measure text: %s", sdl.GetError())
+	}
+
+	return clay.Dimensions{width = f32(width) / GLOB.dpi_scaling, height = f32(height) / GLOB.dpi_scaling}
+}
+
 // Called for each Clay `RenderCommandType.Custom` render command that
 // `prepare_clay_batch` encounters.
 //
@@ -710,7 +739,7 @@ prepare_clay_batch :: proc(
 
 			// Background color behind the image (Clay allows it)
 			bg := color_from_clay(render_data.backgroundColor)
-			if bg[3] > 0 {
+			if bg.a > 0 {
 				rectangle(layer, bounds, bg, radii = radii)
 			}
 
@@ -718,7 +747,12 @@ prepare_clay_batch :: proc(
 			uv, sampler, inner := fit_params(img_data.fit, bounds, img_data.texture_id)
 
 			// Draw the image
-			rectangle_texture(layer, inner, img_data.texture_id, img_data.tint, uv, sampler, radii)
+			rectangle(
+				layer,
+				inner,
+				Texture_Fill{id = img_data.texture_id, tint = img_data.tint, uv_rect = uv, sampler = sampler},
+				radii = radii,
+			)
 		case clay.RenderCommandType.ScissorStart:
 			if bounds.width == 0 || bounds.height == 0 do continue
 
@@ -780,177 +814,18 @@ prepare_clay_batch :: proc(
 	}
 }
 
-// Render primitives. clear_color is the background fill before any layers are drawn.
-end :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window, clear_color: Color = DFT_CLEAR_COLOR) {
-	cmd_buffer := sdl.AcquireGPUCommandBuffer(device)
-	if cmd_buffer == nil {
-		log.panicf("Failed to acquire GPU command buffer: %s", sdl.GetError())
-	}
-
-	// Upload primitives to GPU
-	copy_pass := sdl.BeginGPUCopyPass(cmd_buffer)
-	upload(device, copy_pass)
-	sdl.EndGPUCopyPass(copy_pass)
-
-	swapchain_texture: ^sdl.GPUTexture
-	width, height: u32
-	if !sdl.WaitAndAcquireGPUSwapchainTexture(cmd_buffer, window, &swapchain_texture, &width, &height) {
-		log.panicf("Failed to acquire swapchain texture: %s", sdl.GetError())
-	}
-
-	if swapchain_texture == nil {
-		// Window is minimized or not visible — submit and skip this frame
-		if !sdl.SubmitGPUCommandBuffer(cmd_buffer) {
-			log.panicf("Failed to submit GPU command buffer (minimized window): %s", sdl.GetError())
-		}
-		return
-	}
-
-	use_msaa := GLOB.sample_count != ._1
-	render_texture := swapchain_texture
-
-	if use_msaa {
-		ensure_msaa_texture(device, sdl.GetGPUSwapchainTextureFormat(device, window), width, height)
-		render_texture = GLOB.msaa_texture
-	}
-
-	// Premultiply clear color: the blend state is ONE, ONE_MINUS_SRC_ALPHA (premultiplied),
-	// so the clear color must also be premultiplied for correct background compositing.
-	clear_color_straight := color_to_f32(clear_color)
-	clear_alpha := clear_color_straight[3]
-	clear_color_f32 := [4]f32 {
-		clear_color_straight[0] * clear_alpha,
-		clear_color_straight[1] * clear_alpha,
-		clear_color_straight[2] * clear_alpha,
-		clear_alpha,
-	}
-
-	// Draw layers. One render pass per layer; sub-batches draw in submission order within each scissor.
-	for &layer, index in GLOB.layers {
-		log.debug("Drawing layer", index)
-		draw_layer(device, window, cmd_buffer, render_texture, width, height, clear_color_f32, &layer)
-	}
-
-	// Resolve MSAA render texture to the swapchain.
-	if use_msaa {
-		resolve_pass := sdl.BeginGPURenderPass(
-			cmd_buffer,
-			&sdl.GPUColorTargetInfo {
-				texture = render_texture,
-				load_op = .LOAD,
-				store_op = .RESOLVE,
-				resolve_texture = swapchain_texture,
-			},
-			1,
-			nil,
-		)
-		sdl.EndGPURenderPass(resolve_pass)
-	}
-
-	if !sdl.SubmitGPUCommandBuffer(cmd_buffer) {
-		log.panicf("Failed to submit GPU command buffer: %s", sdl.GetError())
-	}
-}
-
 // ---------------------------------------------------------------------------------------------------------------------
-// ----- MSAA --------------------------
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Query the highest MSAA sample count supported by the GPU for the swapchain format.
-max_sample_count :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window) -> sdl.GPUSampleCount {
-	format := sdl.GetGPUSwapchainTextureFormat(device, window)
-	counts := [?]sdl.GPUSampleCount{._8, ._4, ._2}
-	for count in counts {
-		if sdl.GPUTextureSupportsSampleCount(device, format, count) do return count
-	}
-	return ._1
-}
-
-@(private = "file")
-ensure_msaa_texture :: proc(device: ^sdl.GPUDevice, format: sdl.GPUTextureFormat, width, height: u32) {
-	if GLOB.msaa_texture != nil && GLOB.msaa_width == width && GLOB.msaa_height == height {
-		return
-	}
-	if GLOB.msaa_texture != nil {
-		sdl.ReleaseGPUTexture(device, GLOB.msaa_texture)
-	}
-	GLOB.msaa_texture = sdl.CreateGPUTexture(
-		device,
-		sdl.GPUTextureCreateInfo {
-			type = .D2,
-			format = format,
-			usage = {.COLOR_TARGET},
-			width = width,
-			height = height,
-			layer_count_or_depth = 1,
-			num_levels = 1,
-			sample_count = GLOB.sample_count,
-		},
-	)
-	if GLOB.msaa_texture == nil {
-		log.panicf("Failed to create MSAA texture (%dx%d): %s", width, height, sdl.GetError())
-	}
-	GLOB.msaa_width = width
-	GLOB.msaa_height = height
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- Utility -----------------------
-// ---------------------------------------------------------------------------------------------------------------------
-
-ortho_rh :: proc(left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32) -> matrix[4, 4]f32 {
-	return matrix[4, 4]f32{
-		2.0 / (right - left), 0.0, 0.0, -(right + left) / (right - left),
-		0.0, 2.0 / (top - bottom), 0.0, -(top + bottom) / (top - bottom),
-		0.0, 0.0, -2.0 / (far - near), -(far + near) / (far - near),
-		0.0, 0.0, 0.0, 1.0,
-	}
-}
-
-Draw_Mode :: enum u32 {
-	Tessellated = 0,
-	SDF         = 1,
-}
-
-Vertex_Uniforms :: struct {
-	projection: matrix[4, 4]f32,
-	scale:      f32,
-	mode:       Draw_Mode,
-}
-
-// Push projection, dpi scale, and rendering mode as a single uniform block (slot 0).
-push_globals :: proc(
-	cmd_buffer: ^sdl.GPUCommandBuffer,
-	width: f32,
-	height: f32,
-	mode: Draw_Mode = .Tessellated,
-) {
-	globals := Vertex_Uniforms {
-		projection = ortho_rh(
-			left = 0.0,
-			top = 0.0,
-			right = f32(width),
-			bottom = f32(height),
-			near = -1.0,
-			far = 1.0,
-		),
-		scale      = GLOB.dpi_scaling,
-		mode       = mode,
-	}
-
-	sdl.PushGPUVertexUniformData(cmd_buffer, 0, &globals, size_of(Vertex_Uniforms))
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- Buffer ------------------------
+// ----- Buffer ------------
 // ---------------------------------------------------------------------------------------------------------------------
 
+//INTERNAL
 Buffer :: struct {
 	gpu:      ^sdl.GPUBuffer,
 	transfer: ^sdl.GPUTransferBuffer,
 	size:     u32,
 }
 
+//INTERNAL
 @(require_results)
 create_buffer :: proc(
 	device: ^sdl.GPUDevice,
@@ -977,6 +852,7 @@ create_buffer :: proc(
 	return Buffer{gpu, transfer, size}, true
 }
 
+//INTERNAL
 grow_buffer_if_needed :: proc(
 	device: ^sdl.GPUDevice,
 	buffer: ^Buffer,
@@ -1001,15 +877,26 @@ grow_buffer_if_needed :: proc(
 	}
 }
 
+//INTERNAL
 destroy_buffer :: proc(device: ^sdl.GPUDevice, buffer: ^Buffer) {
 	sdl.ReleaseGPUBuffer(device, buffer.gpu)
 	sdl.ReleaseGPUTransferBuffer(device, buffer.transfer)
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
-// ----- Transform ------------------------
+// ----- Math ------------
 // ---------------------------------------------------------------------------------------------------------------------
 
+//INTERNAL
+ortho_rh :: proc(left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32) -> matrix[4, 4]f32 {
+	return matrix[4, 4]f32{
+		2.0 / (right - left), 0.0, 0.0, -(right + left) / (right - left),
+		0.0, 2.0 / (top - bottom), 0.0, -(top + bottom) / (top - bottom),
+		0.0, 0.0, -2.0 / (far - near), -(far + near) / (far - near),
+		0.0, 0.0, 0.0, 1.0,
+	}
+}
+
 // 2x3 affine transform for 2D pivot-rotation.
 // Used internally by rotation-aware drawing procs.
 Transform_2D :: struct {
@@ -1071,9 +958,114 @@ needs_transform :: #force_inline proc(origin: Vec2, rotation: f32) -> bool {
 }
 
 // ---------------------------------------------------------------------------------------------------------------------
-// ----- Procedure Groups ------------------------
+// ----- Anchors ------------
 // ---------------------------------------------------------------------------------------------------------------------
 
+// Return Vec2 pixel offsets for use as the `origin` parameter of draw calls.
+// Composable with normal vector +/- arithmetic.
+//
+// Text anchor helpers are in text.odin (they depend on measure_text / SDL_ttf).
+
+// Returns uniform radii (all corners the same) as a fraction of the shorter side.
+// `roundness` is clamped to [0, 1]; 0 = sharp corners, 1 = fully rounded (stadium or circle).
+uniform_radii :: #force_inline proc(rect: Rectangle, roundness: f32) -> Rectangle_Radii {
+	cr := min(rect.width, rect.height) * clamp(roundness, 0, 1) * 0.5
+	return {cr, cr, cr, cr}
+}
+
+//----- Rectangle anchors (origin measured from rectangle's top-left) ----------------------------------
+
+center_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {rectangle.width * 0.5, rectangle.height * 0.5}
+}
+
+top_left_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {0, 0}
+}
+
+top_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {rectangle.width * 0.5, 0}
+}
+
+top_right_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {rectangle.width, 0}
+}
+
+left_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {0, rectangle.height * 0.5}
+}
+
+right_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {rectangle.width, rectangle.height * 0.5}
+}
+
+bottom_left_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {0, rectangle.height}
+}
+
+bottom_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {rectangle.width * 0.5, rectangle.height}
+}
+
+bottom_right_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
+	return {rectangle.width, rectangle.height}
+}
+
+//----- Triangle anchors (origin measured from AABB top-left) ----------------------------------
+
+center_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	bounds_min := Vec2{min(v1.x, v2.x, v3.x), min(v1.y, v2.y, v3.y)}
+	return (v1 + v2 + v3) / 3 - bounds_min
+}
+
+top_left_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	return {0, 0}
+}
+
+top_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	min_x := min(v1.x, v2.x, v3.x)
+	max_x := max(v1.x, v2.x, v3.x)
+	return {(max_x - min_x) * 0.5, 0}
+}
+
+top_right_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	min_x := min(v1.x, v2.x, v3.x)
+	max_x := max(v1.x, v2.x, v3.x)
+	return {max_x - min_x, 0}
+}
+
+left_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	min_y := min(v1.y, v2.y, v3.y)
+	max_y := max(v1.y, v2.y, v3.y)
+	return {0, (max_y - min_y) * 0.5}
+}
+
+right_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	bounds_min := Vec2{min(v1.x, v2.x, v3.x), min(v1.y, v2.y, v3.y)}
+	bounds_max := Vec2{max(v1.x, v2.x, v3.x), max(v1.y, v2.y, v3.y)}
+	return {bounds_max.x - bounds_min.x, (bounds_max.y - bounds_min.y) * 0.5}
+}
+
+bottom_left_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	min_y := min(v1.y, v2.y, v3.y)
+	max_y := max(v1.y, v2.y, v3.y)
+	return {0, max_y - min_y}
+}
+
+bottom_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	bounds_min := Vec2{min(v1.x, v2.x, v3.x), min(v1.y, v2.y, v3.y)}
+	bounds_max := Vec2{max(v1.x, v2.x, v3.x), max(v1.y, v2.y, v3.y)}
+	return {(bounds_max.x - bounds_min.x) * 0.5, bounds_max.y - bounds_min.y}
+}
+
+bottom_right_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
+	bounds_min := Vec2{min(v1.x, v2.x, v3.x), min(v1.y, v2.y, v3.y)}
+	bounds_max := Vec2{max(v1.x, v2.x, v3.x), max(v1.y, v2.y, v3.y)}
+	return bounds_max - bounds_min
+}
+
+//----- Procedure groups ----------------------------------
+
 center_of :: proc {
 	center_of_rectangle,
 	center_of_triangle,
diff --git a/draw/draw_qr/draw_qr.odin b/draw/draw_qr/draw_qr.odin
index d7f8586..f9e9e9d 100644
--- a/draw/draw_qr/draw_qr.odin
+++ b/draw/draw_qr/draw_qr.odin
@@ -20,7 +20,7 @@ texture_size :: #force_inline proc(qrcode_buf: []u8) -> int {
 //
 // Returns ok=false when:
 //   - qrcode_buf is invalid (qrcode.get_size returns 0).
-//   - texture_buf is smaller than to_texture_size(qrcode_buf).
+//   - texture_buf is smaller than texture_size(qrcode_buf).
 @(require_results)
 to_texture :: proc(
 	qrcode_buf: []u8,
diff --git a/draw/examples/backdrop.odin b/draw/examples/backdrop.odin
new file mode 100644
index 0000000..2d3a6fb
--- /dev/null
+++ b/draw/examples/backdrop.odin
@@ -0,0 +1,382 @@
+package examples
+
+import "core:fmt"
+import "core:math"
+import "core:os"
+import sdl "vendor:sdl3"
+
+import "../../draw"
+import cyber "../cybersteel"
+
+// Backdrop example.
+//
+// Exercises the bracket scheduler end-to-end. The demo is structured as three zones in one
+// window so we can stress-test the cases that matter:
+//
+//   Zone 1 (top, base layer): animated colorful background + two side-by-side frosted panels
+//                             with DIFFERENT sigmas and DIFFERENT tints. Tests sigma grouping
+//                             and per-primitive tint.
+//
+//   Zone 2 (bottom-left, second layer): a small frosted panel in a NEW layer; its bracket sees
+//                                       Zone 1's full content (base layer's bracket output is
+//                                       carried forward via source_texture). Tests multi-layer
+//                                       backdrop sampling.
+//
+//   Zone 3 (bottom-right, base layer): edge cases. A sigma=0 "mirror" panel (no blur), two
+//                                      same-sigma panels stacked (tests sub-batch coalescing
+//                                      via append_or_extend_sub_batch), and text drawn ON TOP
+//                                      of a backdrop (tests Pass B post-bracket rendering).
+//
+// Animation: an orbiting gradient stripe plus a few orbiting circles in Zone 1. Motion is the
+// only way to visually confirm the blur is Gaussian; a static panel can't tell you whether the
+// kernel coefficients are right.
+gaussian_blur :: proc() {
+	if !sdl.Init({.VIDEO}) do os.exit(1)
+	window := sdl.CreateWindow("Backdrop blur", 800, 600, {.HIGH_PIXEL_DENSITY})
+	gpu := sdl.CreateGPUDevice(draw.PLATFORM_SHADER_FORMAT, true, nil)
+	if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1)
+	if !draw.init(gpu, window) do os.exit(1)
+	PLEX_SANS_REGULAR = draw.register_font(cyber.SANS_REGULAR_RAW)
+
+	WINDOW_W :: f32(800)
+	WINDOW_H :: f32(600)
+	FONT_SIZE :: u16(14)
+
+	t: f32 = 0
+
+	for {
+		defer free_all(context.temp_allocator)
+		ev: sdl.Event
+		for sdl.PollEvent(&ev) {
+			if ev.type == .QUIT do return
+		}
+		t += 1
+
+		base_layer := draw.begin({width = WINDOW_W, height = WINDOW_H})
+
+		//----- Background fill ----------------------------------
+		draw.rectangle(base_layer, {0, 0, WINDOW_W, WINDOW_H}, draw.Color{20, 20, 28, 255})
+
+		//----- Zone 1: animated background for the top frosted panels ----------------------------------
+
+		// A wide rotating gradient stripe sweeps left-to-right across Zone 1. The angle changes
+		// over time so the gradient itself shifts visibly.
+		stripe_angle := t * 0.4
+		draw.rectangle(
+			base_layer,
+			{20, 20, WINDOW_W - 40, 240},
+			draw.Linear_Gradient {
+				start_color = {255, 80, 60, 255},
+				end_color = {60, 120, 255, 255},
+				angle = stripe_angle,
+			},
+		)
+
+		// Five orbiting circles inside Zone 1's strip. The blur should smooth their hard edges
+		// and the gradient behind them into a continuous wash.
+		for i in 0 ..< 5 {
+			phase := f32(i) * 1.2 + t * 0.04
+			cx := 100 + f32(i) * 140 + math.cos(phase) * 30
+			cy := 140 + math.sin(phase) * 50
+			circle_color := draw.Color {
+				u8(clamp(120 + math.cos(phase) * 100, 0, 255)),
+				u8(clamp(180 + math.sin(phase * 1.3) * 60, 0, 255)),
+				u8(clamp(220 - math.sin(phase) * 80, 0, 255)),
+				255,
+			}
+			draw.circle(base_layer, {cx, cy}, 22, circle_color)
+		}
+
+		// Bright accent rectangles to give the blur some sharp edges to munch on.
+		draw.rectangle(base_layer, {200, 60, 60, 12}, draw.Color{255, 255, 200, 255})
+		draw.rectangle(base_layer, {500, 200, 80, 16}, draw.Color{200, 255, 200, 255})
+
+		//----- Zone 1 frosted panels: different sigmas, different tints --------------------------------
+
+		// Panel A: heavy blur, cool blue-grey tint. sigma=14 in logical px.
+		// Both panels share rounded corners.
+		panel_radii := draw.Rectangle_Radii{16, 16, 16, 16}
+
+		draw.gaussian_blur(
+			base_layer,
+			{60, 80, 320, 140},
+			gaussian_sigma = 30,
+			tint = draw.Color{170, 200, 240, 200}, // cool blue, strong mix
+			radii = panel_radii,
+		)
+		draw.text(
+			base_layer,
+			"sigma = 20, cool tint",
+			{72, 90},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.Color{30, 35, 50, 255},
+		)
+
+		// Panel B: lighter blur, warm amber tint. sigma=6.
+		draw.gaussian_blur(
+			base_layer,
+			{420, 80, 320, 140},
+			gaussian_sigma = 6,
+			tint = draw.Color{255, 220, 160, 200}, // warm amber, strong mix
+			radii = panel_radii,
+		)
+		draw.text(
+			base_layer,
+			"sigma = 6, warm tint",
+			{432, 90},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.Color{60, 40, 20, 255},
+		)
+
+		// Pass-B verification: a rectangle drawn AFTER the backdrops in the same layer
+		// Per the bracket scheduling model, this should render ON TOP of both panels above.
+		// If you see this stripe behind the panels instead of in front, something is wrong with
+		// the Pass B post-bracket path.
+		draw.rectangle(base_layer, {WINDOW_W * 0.5 - 4, 70, 8, 160}, draw.Color{255, 255, 255, 230})
+
+		//----- Zone 2: second layer with its own backdrop --------------------------------
+		// Zone 2's panel is in a NEW layer. Its bracket samples source_texture as it stands
+		// after the base layer fully finished (including the base layer's bracket V-composite
+		// output). So this panel sees Zone 1's frosted panels through its own blur.
+
+		zone2 := draw.new_layer(base_layer, {0, 280, WINDOW_W * 0.55, WINDOW_H - 280})
+
+		// Pass A content for zone2: a translucent darker overlay to make the panel pop.
+		draw.rectangle(zone2, {20, 300, WINDOW_W * 0.55 - 40, WINDOW_H - 320}, draw.Color{0, 0, 0, 80})
+
+		// Animated diagonal stripe in Zone 2 so the blur in this layer's panel has motion to
+		// smooth, not just the static base-layer content.
+		stripe_y := 320 + (math.sin(t * 0.05) * 0.5 + 0.5) * 200
+		draw.rectangle(zone2, {30, stripe_y, WINDOW_W * 0.55 - 60, 18}, draw.Color{255, 100, 200, 200})
+
+		// Zone 2's frosted panel.
+		draw.gaussian_blur(
+			zone2,
+			{60, 360, WINDOW_W * 0.55 - 120, 160},
+			gaussian_sigma = 10,
+			tint = draw.WHITE, // pure blur (white tint with any alpha is a no-op)
+			radii = draw.Rectangle_Radii{24, 24, 24, 24},
+		)
+		draw.text(
+			zone2,
+			"Layer 2 backdrop",
+			{72, 372},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.Color{30, 30, 30, 255},
+		)
+		draw.text(
+			zone2,
+			"sigma = 10",
+			{72, 392},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.Color{60, 60, 60, 255},
+		)
+
+		//----- Zone 3: edge cases (back in base layer would also work, but we use zone2 to keep --------
+		// the demo's two-layer structure simple). Zone 3 lives in a third layer so it gets
+		// a fresh source snapshot too.
+		zone3 := draw.new_layer(zone2, {WINDOW_W * 0.55, 280, WINDOW_W * 0.45, WINDOW_H - 280})
+
+		// Animated background patch for Zone 3 so its mirror panel has something to reflect.
+		for i in 0 ..< 4 {
+			phase := f32(i) * 1.5 + t * 0.06
+			y := 310 + f32(i) * 60 + math.sin(phase) * 8
+			draw.rectangle(
+				zone3,
+				{WINDOW_W * 0.55 + 20, y, WINDOW_W * 0.45 - 40, 14},
+				draw.Color {
+					u8(clamp(200 + math.cos(phase) * 50, 0, 255)),
+					u8(clamp(150 + math.sin(phase) * 80, 0, 255)),
+					u8(clamp(220 - math.cos(phase * 1.7) * 60, 0, 255)),
+					255,
+				},
+			)
+		}
+
+		// Edge case 1: sigma = 0 "mirror" — sharp framebuffer sample, no blur. Should reproduce
+		// the underlying pixels exactly through the SDF mask. Tinted slightly so it's visible.
+		draw.gaussian_blur(
+			zone3,
+			{WINDOW_W * 0.55 + 30, 310, 150, 70},
+			gaussian_sigma = 0,
+			tint = draw.WHITE, // pure mirror (no blur, no tint)
+			radii = draw.Rectangle_Radii{12, 12, 12, 12},
+		)
+		draw.text(
+			zone3,
+			"sigma=0 (mirror)",
+			{WINDOW_W * 0.55 + 38, 318},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.Color{20, 20, 20, 255},
+		)
+
+		// Edge case 2: two same-sigma panels submitted contiguously. The sub-batch coalescer
+		// should merge these into a single instanced V-composite draw. Visually, both should
+		// look identical (modulo position) — same blur radius, same tint.
+		draw.gaussian_blur(
+			zone3,
+			{WINDOW_W * 0.55 + 30, 400, 150, 70},
+			gaussian_sigma = 8,
+			tint = draw.Color{160, 255, 160, 200}, // green tint, strong mix
+			radii = draw.Rectangle_Radii{12, 12, 12, 12},
+		)
+		draw.gaussian_blur(
+			zone3,
+			{WINDOW_W * 0.55 + 200, 400, 150, 70},
+			gaussian_sigma = 8,
+			tint = draw.Color{160, 255, 160, 200}, // identical: tests sub-batch coalescing
+			radii = draw.Rectangle_Radii{12, 12, 12, 12},
+		)
+		draw.text(
+			zone3,
+			"sigma=8 (coalesced pair)",
+			{WINDOW_W * 0.55 + 38, 408},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.Color{20, 40, 20, 255},
+		)
+
+		// Edge case 3: text drawn AFTER a backdrop in the same layer. Tests Pass B over a fresh
+		// V-composite output. The text should appear sharply on top of the green panels above.
+		draw.text(
+			zone3,
+			"Pass B text overlay",
+			{WINDOW_W * 0.55 + 38, 480},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.WHITE,
+		)
+
+		draw.end(gpu, window, draw.Color{15, 15, 22, 255})
+	}
+}
+
+// Backdrop diagnostic example.
+//
+// Minimal isolation harness for debugging the blur. ONE panel, ONE sigma, NO animation. The
+// fixed background gives the eye a stable reference: the blur should smooth a *known* set of
+// hard edges, and any artifacts (crisp circles, ghost mirrors, no apparent change with sigma)
+// stand out clearly.
+//
+// Controls:
+//   UP / DOWN arrow  : adjust sigma by ±1
+//   LEFT / RIGHT arrow : adjust sigma by ±5
+//   SPACE            : reset to sigma=10
+//   T                : toggle the test rectangle on top of the panel
+//
+// Sigma is printed to the title bar so you can correlate visual behavior with the numeric
+// value as you adjust it.
+gaussian_blur_debug :: proc() {
+	if !sdl.Init({.VIDEO}) do os.exit(1)
+	window := sdl.CreateWindow("Backdrop debug", 800, 600, {.HIGH_PIXEL_DENSITY})
+	gpu := sdl.CreateGPUDevice(draw.PLATFORM_SHADER_FORMAT, true, nil)
+	if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1)
+	if !draw.init(gpu, window) do os.exit(1)
+	defer draw.destroy(gpu)
+	PLEX_SANS_REGULAR = draw.register_font(cyber.SANS_REGULAR_RAW)
+
+	WINDOW_W :: f32(800)
+	WINDOW_H :: f32(600)
+	FONT_SIZE :: u16(14)
+
+	sigma: f32 = 10
+	show_test_rect := true
+
+	for {
+		defer free_all(context.temp_allocator)
+		ev: sdl.Event
+		for sdl.PollEvent(&ev) {
+			if ev.type == .QUIT do return
+			if ev.type == .KEY_DOWN {
+				#partial switch ev.key.scancode {
+				case .UP: sigma += 1
+				case .DOWN: sigma = max(sigma - 1, 0)
+				case .RIGHT: sigma += 5
+				case .LEFT: sigma = max(sigma - 5, 0)
+				case .SPACE: sigma = 10
+				case .T: show_test_rect = !show_test_rect
+				}
+			}
+		}
+
+		// Update title with current sigma so we can correlate visuals to numbers.
+		title := fmt.ctprintf("Backdrop debug | sigma = %.1f", sigma)
+		sdl.SetWindowTitle(window, title)
+
+		base_layer := draw.begin({width = WINDOW_W, height = WINDOW_H})
+
+		// Background: deliberately high-contrast static content. The eye can verify whether
+		// hard edges (the black grid lines, the crisp circles, the fine vertical bars) get
+		// smoothed by the panel. NOTHING animates here — every difference between frames is
+		// caused by user input (sigma change), not by the demo itself.
+		draw.rectangle(base_layer, {0, 0, WINDOW_W, WINDOW_H}, draw.Color{255, 255, 255, 255})
+
+		// Black grid: 8x6 cells with thin lines. Each grid cell is 100x100 logical px.
+		for x: f32 = 0; x <= WINDOW_W; x += 100 {
+			draw.rectangle(base_layer, {x - 1, 0, 2, WINDOW_H}, draw.BLACK)
+		}
+		for y: f32 = 0; y <= WINDOW_H; y += 100 {
+			draw.rectangle(base_layer, {0, y - 1, WINDOW_W, 2}, draw.BLACK)
+		}
+
+		// A row of small bright circles across the middle. Their crisp edges are the most
+		// sensitive blur indicator.
+		for i in 0 ..< 8 {
+			cx := f32(i) * 100 + 50
+			color := draw.Color{u8((i * 32) & 0xff), u8((i * 64) & 0xff), u8(255 - (i * 32) & 0xff), 255}
+			draw.circle(base_layer, {cx, 350}, 25, color)
+		}
+
+		// Vertical fine-detail stripes on the left edge. At any meaningful sigma these should
+		// merge into a flat color through the panel.
+		for i in 0 ..< 20 {
+			x := 30 + f32(i) * 6
+			color := draw.RED if i % 2 == 0 else draw.BLUE
+			draw.rectangle(base_layer, {x, 200, 4, 200}, color)
+		}
+
+		// THE PANEL UNDER TEST. Square, centered, large enough to cover multiple grid cells and
+		// the circle row. Square shape makes any horizontal-vs-vertical asymmetry purely
+		// renderer-driven (geometry can't introduce it).
+		panel := draw.Rectangle{250, 150, 300, 300}
+		draw.gaussian_blur(
+			base_layer,
+			panel,
+			gaussian_sigma = sigma,
+			tint = draw.WHITE,
+			radii = draw.Rectangle_Radii{20, 20, 20, 20},
+		)
+
+		// Pass B test: a bright rectangle drawn AFTER the backdrop in the same layer. Should
+		// always render on top of the panel. If the panel ever shows a "ghost" of this rect
+		// inside its blur, the V-composite is sampling the wrong texture state.
+		if show_test_rect {
+			draw.rectangle(base_layer, {380, 280, 40, 40}, draw.Color{0, 200, 0, 255})
+		}
+
+		// Sigma label at the bottom in giant text so you can read it from across the room.
+		draw.text(
+			base_layer,
+			fmt.tprintf("sigma = %.1f", sigma),
+			{20, WINDOW_H - 40},
+			PLEX_SANS_REGULAR,
+			28,
+			color = draw.BLACK,
+		)
+		draw.text(
+			base_layer,
+			"UP/DOWN ±1   LEFT/RIGHT ±5   SPACE reset   T toggle test rect",
+			{20, WINDOW_H - 70},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.Color{60, 60, 60, 255},
+		)
+
+		draw.end(gpu, window, draw.Color{255, 255, 255, 255})
+	}
+}
diff --git a/draw/examples/examples.odin b/draw/examples/examples.odin
index c75d49a..c865437 100644
--- a/draw/examples/examples.odin
+++ b/draw/examples/examples.odin
@@ -5,65 +5,88 @@ import "core:log"
 import "core:mem"
 import "core:os"
 
+EX_HELLOPE_SHAPES :: "hellope-shapes"
+EX_HELLOPE_TEXT :: "hellope-text"
+EX_HELLOPE_CLAY :: "hellope-clay"
+EX_HELLOPE_CUSTOM :: "hellope-custom"
+EX_TEXTURES :: "textures"
+EX_GAUSSIAN_BLUR :: "gaussian-blur"
+EX_GAUSSIAN_BLUR_DEBUG :: "gaussian-blur-debug"
+
+AVAILABLE_EXAMPLES_MSG ::
+	"Available examples: " +
+	EX_HELLOPE_SHAPES +
+	", " +
+	EX_HELLOPE_TEXT +
+	", " +
+	EX_HELLOPE_CLAY +
+	", " +
+	EX_HELLOPE_CUSTOM +
+	", " +
+	EX_TEXTURES +
+	", " +
+	EX_GAUSSIAN_BLUR +
+	", " +
+	EX_GAUSSIAN_BLUR_DEBUG
+
 main :: proc() {
 	//----- General setup ----------------------------------
-	{
-		// Temp
-		track_temp: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track_temp, context.temp_allocator)
-		context.temp_allocator = mem.tracking_allocator(&track_temp)
+	// Temp
+	track_temp: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track_temp, context.temp_allocator)
+	context.temp_allocator = mem.tracking_allocator(&track_temp)
 
-		// Default
-		track: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track, context.allocator)
-		context.allocator = mem.tracking_allocator(&track)
-		// Log a warning about any memory that was not freed by the end of the program.
-		// This could be fine for some global state or it could be a memory leak.
-		defer {
-			// Temp allocator
-			if len(track_temp.bad_free_array) > 0 {
-				fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
-				for entry in track_temp.bad_free_array {
-					fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
-				}
-				mem.tracking_allocator_destroy(&track_temp)
+	// Default
+	track: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track, context.allocator)
+	context.allocator = mem.tracking_allocator(&track)
+	// Log a warning about any memory that was not freed by the end of the program.
+	// This could be fine for some global state or it could be a memory leak.
+	defer {
+		// Temp allocator
+		if len(track_temp.bad_free_array) > 0 {
+			fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
+			for entry in track_temp.bad_free_array {
+				fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
 			}
-			// Default allocator
-			if len(track.allocation_map) > 0 {
-				fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
-				for _, entry in track.allocation_map {
-					fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
-				}
-			}
-			if len(track.bad_free_array) > 0 {
-				fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
-				for entry in track.bad_free_array {
-					fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
-				}
-			}
-			mem.tracking_allocator_destroy(&track)
+			mem.tracking_allocator_destroy(&track_temp)
 		}
-		// Logger
-		context.logger = log.create_console_logger()
-		defer log.destroy_console_logger(context.logger)
+		// Default allocator
+		if len(track.allocation_map) > 0 {
+			fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
+			for _, entry in track.allocation_map {
+				fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
+			}
+		}
+		if len(track.bad_free_array) > 0 {
+			fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
+			for entry in track.bad_free_array {
+				fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
+			}
+		}
+		mem.tracking_allocator_destroy(&track)
 	}
+	context.logger = log.create_console_logger()
+	defer log.destroy_console_logger(context.logger)
 
 	args := os.args
 	if len(args) < 2 {
 		fmt.eprintln("Usage: examples ")
-		fmt.eprintln("Available examples: hellope-shapes, hellope-text, hellope-clay, hellope-custom, textures")
+		fmt.eprintln(AVAILABLE_EXAMPLES_MSG)
 		os.exit(1)
 	}
 
 	switch args[1] {
-	case "hellope-clay": hellope_clay()
-	case "hellope-custom": hellope_custom()
-	case "hellope-shapes": hellope_shapes()
-	case "hellope-text": hellope_text()
-	case "textures": textures()
+	case EX_HELLOPE_CLAY: hellope_clay()
+	case EX_HELLOPE_CUSTOM: hellope_custom()
+	case EX_HELLOPE_SHAPES: hellope_shapes()
+	case EX_HELLOPE_TEXT: hellope_text()
+	case EX_TEXTURES: textures()
+	case EX_GAUSSIAN_BLUR: gaussian_blur()
+	case EX_GAUSSIAN_BLUR_DEBUG: gaussian_blur_debug()
 	case:
 		fmt.eprintf("Unknown example: %v\n", args[1])
-		fmt.eprintln("Available examples: hellope-shapes, hellope-text, hellope-clay, hellope-custom, textures")
+		fmt.eprintln(AVAILABLE_EXAMPLES_MSG)
 		os.exit(1)
 	}
 }
diff --git a/draw/examples/fonts/JetBrainsMono-Bold.ttf b/draw/examples/fonts/JetBrainsMono-Bold.ttf
deleted file mode 100644
index 8c93043..0000000
Binary files a/draw/examples/fonts/JetBrainsMono-Bold.ttf and /dev/null differ
diff --git a/draw/examples/fonts/JetBrainsMono-Regular.ttf b/draw/examples/fonts/JetBrainsMono-Regular.ttf
deleted file mode 100644
index dff66cc..0000000
Binary files a/draw/examples/fonts/JetBrainsMono-Regular.ttf and /dev/null differ
diff --git a/draw/examples/hellope.odin b/draw/examples/hellope.odin
index 9a3bafd..f497a9c 100644
--- a/draw/examples/hellope.odin
+++ b/draw/examples/hellope.odin
@@ -1,14 +1,15 @@
 package examples
 
-import "../../draw"
-import "../../draw/tess"
-import "../../vendor/clay"
 import "core:math"
 import "core:os"
 import sdl "vendor:sdl3"
 
-JETBRAINS_MONO_REGULAR_RAW :: #load("fonts/JetBrainsMono-Regular.ttf")
-JETBRAINS_MONO_REGULAR: draw.Font_Id = max(draw.Font_Id) // Max so we crash if registration is forgotten
+import "../../draw"
+import "../../draw/tess"
+import "../../vendor/clay"
+import cyber "../cybersteel"
+
+PLEX_SANS_REGULAR: draw.Font_Id = max(draw.Font_Id) // Max so we crash if registration is forgotten
 
 hellope_shapes :: proc() {
 	if !sdl.Init({.VIDEO}) do os.exit(1)
@@ -47,8 +48,7 @@ hellope_shapes :: proc() {
 		draw.rectangle(
 			base_layer,
 			{20, 160, 460, 60},
-			{255, 0, 0, 255},
-			gradient = draw.Linear_Gradient{end_color = {0, 0, 255, 255}, angle = 0},
+			draw.Linear_Gradient{start_color = {255, 0, 0, 255}, end_color = {0, 0, 255, 255}, angle = 0},
 		)
 
 		// ----- Rotation demos -----
@@ -78,18 +78,18 @@ hellope_shapes :: proc() {
 		)
 
 		// Ellipse rotating around its center (tilted ellipse)
-		draw.ellipse(base_layer, {410, 340}, 50, 30, {255, 200, 50, 255}, rotation = spin_angle)
+		draw.ellipse(base_layer, {410, 340}, 50, 30, draw.Color{255, 200, 50, 255}, rotation = spin_angle)
 
 		// Circle orbiting a point (moon orbiting planet)
 		// Convention B: center = pivot point (planet), origin = offset from moon center to pivot.
 		// Moon's visual center at rotation=0: planet_pos - origin = (100, 450) - (0, 40) = (100, 410).
 		planet_pos := draw.Vec2{100, 450}
-		draw.circle(base_layer, planet_pos, 8, {200, 200, 200, 255}) // planet (stationary)
+		draw.circle(base_layer, planet_pos, 8, draw.Color{200, 200, 200, 255}) // planet (stationary)
 		draw.circle(
 			base_layer,
 			planet_pos,
 			5,
-			{100, 150, 255, 255},
+			draw.Color{100, 150, 255, 255},
 			origin = draw.Vec2{0, 40},
 			rotation = spin_angle,
 		) // moon orbiting
@@ -100,7 +100,7 @@ hellope_shapes :: proc() {
 			draw.Vec2{250, 450},
 			0,
 			30,
-			{100, 100, 220, 255},
+			draw.Color{100, 100, 220, 255},
 			start_angle = 0,
 			end_angle = 270,
 			rotation = spin_angle,
@@ -126,7 +126,7 @@ hellope_shapes :: proc() {
 			{460, 450},
 			6,
 			30,
-			{180, 100, 220, 255},
+			draw.Color{180, 100, 220, 255},
 			outline_color = draw.WHITE,
 			outline_width = 2,
 			rotation = spin_angle,
@@ -147,7 +147,7 @@ hellope_text :: proc() {
 	gpu := sdl.CreateGPUDevice(draw.PLATFORM_SHADER_FORMAT, true, nil)
 	if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1)
 	if !draw.init(gpu, window) do os.exit(1)
-	JETBRAINS_MONO_REGULAR = draw.register_font(JETBRAINS_MONO_REGULAR_RAW)
+	PLEX_SANS_REGULAR = draw.register_font(cyber.SANS_REGULAR_RAW)
 
 	FONT_SIZE :: u16(24)
 	spin_angle: f32 = 0
@@ -168,10 +168,10 @@ hellope_text :: proc() {
 			base_layer,
 			"Hellope!",
 			{300, 80},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
-			origin = draw.center_of("Hellope!", JETBRAINS_MONO_REGULAR, FONT_SIZE),
+			origin = draw.center_of("Hellope!", PLEX_SANS_REGULAR, FONT_SIZE),
 			id = HELLOPE_ID,
 		)
 
@@ -180,35 +180,28 @@ hellope_text :: proc() {
 			base_layer,
 			"Hellope World!",
 			{300, 250},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = {255, 200, 50, 255},
-			origin = draw.center_of("Hellope World!", JETBRAINS_MONO_REGULAR, FONT_SIZE),
+			origin = draw.center_of("Hellope World!", PLEX_SANS_REGULAR, FONT_SIZE),
 			rotation = spin_angle,
 			id = ROTATING_SENTENCE_ID,
 		)
 
 		// Uncached text (no id) — created and destroyed each frame, simplest usage
-		draw.text(
-			base_layer,
-			"Top-left anchored",
-			{20, 450},
-			JETBRAINS_MONO_REGULAR,
-			FONT_SIZE,
-			color = draw.WHITE,
-		)
+		draw.text(base_layer, "Top-left anchored", {20, 450}, PLEX_SANS_REGULAR, FONT_SIZE, color = draw.WHITE)
 
 		// Measure text for manual layout
-		size := draw.measure_text("Measured!", JETBRAINS_MONO_REGULAR, FONT_SIZE)
+		size := draw.measure_text("Measured!", PLEX_SANS_REGULAR, FONT_SIZE)
 		draw.rectangle(base_layer, {300 - size.x / 2, 380, size.x, size.y}, draw.Color{60, 60, 60, 200})
 		draw.text(
 			base_layer,
 			"Measured!",
 			{300, 380},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
-			origin = draw.top_of("Measured!", JETBRAINS_MONO_REGULAR, FONT_SIZE),
+			origin = draw.top_of("Measured!", PLEX_SANS_REGULAR, FONT_SIZE),
 			id = MEASURED_ID,
 		)
 
@@ -217,7 +210,7 @@ hellope_text :: proc() {
 			base_layer,
 			"Corner spin",
 			{150, 530},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = {100, 200, 255, 255},
 			rotation = spin_angle,
@@ -234,10 +227,10 @@ hellope_clay :: proc() {
 	gpu := sdl.CreateGPUDevice(draw.PLATFORM_SHADER_FORMAT, true, nil)
 	if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1)
 	if !draw.init(gpu, window) do os.exit(1)
-	JETBRAINS_MONO_REGULAR = draw.register_font(JETBRAINS_MONO_REGULAR_RAW)
+	PLEX_SANS_REGULAR = draw.register_font(cyber.SANS_REGULAR_RAW)
 
 	text_config := clay.TextElementConfig {
-		fontId    = JETBRAINS_MONO_REGULAR,
+		fontId    = PLEX_SANS_REGULAR,
 		fontSize  = 36,
 		textColor = {255, 255, 255, 255},
 	}
@@ -278,10 +271,10 @@ hellope_custom :: proc() {
 	gpu := sdl.CreateGPUDevice(draw.PLATFORM_SHADER_FORMAT, true, nil)
 	if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1)
 	if !draw.init(gpu, window) do os.exit(1)
-	JETBRAINS_MONO_REGULAR = draw.register_font(JETBRAINS_MONO_REGULAR_RAW)
+	PLEX_SANS_REGULAR = draw.register_font(cyber.SANS_REGULAR_RAW)
 
 	text_config := clay.TextElementConfig {
-		fontId    = JETBRAINS_MONO_REGULAR,
+		fontId    = PLEX_SANS_REGULAR,
 		fontSize  = 24,
 		textColor = {255, 255, 255, 255},
 	}
diff --git a/draw/examples/textures.odin b/draw/examples/textures.odin
index 258cb76..9a3c6d9 100644
--- a/draw/examples/textures.odin
+++ b/draw/examples/textures.odin
@@ -1,17 +1,19 @@
 package examples
 
-import "../../draw"
-import "../../draw/draw_qr"
 import "core:os"
 import sdl "vendor:sdl3"
 
+import "../../draw"
+import "../../draw/draw_qr"
+import cyber "../cybersteel"
+
 textures :: proc() {
 	if !sdl.Init({.VIDEO}) do os.exit(1)
-	window := sdl.CreateWindow("Textures", 800, 600, {.HIGH_PIXEL_DENSITY})
+	window := sdl.CreateWindow("Textures", 800, 750, {.HIGH_PIXEL_DENSITY})
 	gpu := sdl.CreateGPUDevice(draw.PLATFORM_SHADER_FORMAT, true, nil)
 	if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1)
 	if !draw.init(gpu, window) do os.exit(1)
-	JETBRAINS_MONO_REGULAR = draw.register_font(JETBRAINS_MONO_REGULAR_RAW)
+	PLEX_SANS_REGULAR = draw.register_font(cyber.SANS_REGULAR_RAW)
 
 	FONT_SIZE :: u16(14)
 	LABEL_OFFSET :: f32(8) // gap between item and its label
@@ -86,10 +88,10 @@ textures :: proc() {
 		}
 		spin_angle += 1
 
-		base_layer := draw.begin({width = 800, height = 600})
+		base_layer := draw.begin({width = 800, height = 750})
 
 		// Background
-		draw.rectangle(base_layer, {0, 0, 800, 600}, draw.Color{30, 30, 30, 255})
+		draw.rectangle(base_layer, {0, 0, 800, 750}, draw.Color{30, 30, 30, 255})
 
 		//----- Row 1: Sampler presets (y=30) ----------------------------------
 
@@ -101,50 +103,61 @@ textures :: proc() {
 		COL4 :: f32(480)
 
 		// Nearest (sharp pixel edges)
-		draw.rectangle_texture(
+		draw.rectangle(
 			base_layer,
 			{COL1, ROW1_Y, ITEM_SIZE, ITEM_SIZE},
-			checker_texture,
-			sampler = .Nearest_Clamp,
+			draw.Texture_Fill {
+				id = checker_texture,
+				tint = draw.WHITE,
+				uv_rect = {0, 0, 1, 1},
+				sampler = .Nearest_Clamp,
+			},
 		)
 		draw.text(
 			base_layer,
 			"Nearest",
 			{COL1, ROW1_Y + ITEM_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
 
 		// Linear (bilinear blur)
-		draw.rectangle_texture(
+		draw.rectangle(
 			base_layer,
 			{COL2, ROW1_Y, ITEM_SIZE, ITEM_SIZE},
-			checker_texture,
-			sampler = .Linear_Clamp,
+			draw.Texture_Fill {
+				id = checker_texture,
+				tint = draw.WHITE,
+				uv_rect = {0, 0, 1, 1},
+				sampler = .Linear_Clamp,
+			},
 		)
 		draw.text(
 			base_layer,
 			"Linear",
 			{COL2, ROW1_Y + ITEM_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
 
 		// Tiled (4x repeat)
-		draw.rectangle_texture(
+		draw.rectangle(
 			base_layer,
 			{COL3, ROW1_Y, ITEM_SIZE, ITEM_SIZE},
-			checker_texture,
-			sampler = .Nearest_Repeat,
-			uv_rect = {0, 0, 4, 4},
+			draw.Texture_Fill {
+				id = checker_texture,
+				tint = draw.WHITE,
+				uv_rect = {0, 0, 4, 4},
+				sampler = .Nearest_Repeat,
+			},
 		)
 		draw.text(
 			base_layer,
 			"Tiled 4x",
 			{COL3, ROW1_Y + ITEM_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
@@ -155,45 +168,52 @@ textures :: proc() {
 
 		// QR code (RGBA texture with baked colors, nearest sampling)
 		draw.rectangle(base_layer, {COL1, ROW2_Y, ITEM_SIZE, ITEM_SIZE}, draw.Color{255, 255, 255, 255}) // white bg
-		draw.rectangle_texture(
+		draw.rectangle(
 			base_layer,
 			{COL1, ROW2_Y, ITEM_SIZE, ITEM_SIZE},
-			qr_texture,
-			sampler = .Nearest_Clamp,
+			draw.Texture_Fill{id = qr_texture, tint = draw.WHITE, uv_rect = {0, 0, 1, 1}, sampler = .Nearest_Clamp},
 		)
 		draw.text(
 			base_layer,
 			"QR Code",
 			{COL1, ROW2_Y + ITEM_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
 
 		// Rounded corners
-		draw.rectangle_texture(
+		draw.rectangle(
 			base_layer,
 			{COL2, ROW2_Y, ITEM_SIZE, ITEM_SIZE},
-			checker_texture,
-			sampler = .Nearest_Clamp,
+			draw.Texture_Fill {
+				id = checker_texture,
+				tint = draw.WHITE,
+				uv_rect = {0, 0, 1, 1},
+				sampler = .Nearest_Clamp,
+			},
 			radii = draw.uniform_radii({COL2, ROW2_Y, ITEM_SIZE, ITEM_SIZE}, 0.3),
 		)
 		draw.text(
 			base_layer,
 			"Rounded",
 			{COL2, ROW2_Y + ITEM_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
 
 		// Rotating
 		rot_rect := draw.Rectangle{COL3, ROW2_Y, ITEM_SIZE, ITEM_SIZE}
-		draw.rectangle_texture(
+		draw.rectangle(
 			base_layer,
 			rot_rect,
-			checker_texture,
-			sampler = .Nearest_Clamp,
+			draw.Texture_Fill {
+				id = checker_texture,
+				tint = draw.WHITE,
+				uv_rect = {0, 0, 1, 1},
+				sampler = .Nearest_Clamp,
+			},
 			origin = draw.center_of(rot_rect),
 			rotation = spin_angle,
 		)
@@ -201,7 +221,7 @@ textures :: proc() {
 			base_layer,
 			"Rotating",
 			{COL3, ROW2_Y + ITEM_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
@@ -214,12 +234,16 @@ textures :: proc() {
 		// Stretch
 		uv_s, sampler_s, inner_s := draw.fit_params(.Stretch, {COL1, ROW3_Y, FIT_SIZE, FIT_SIZE}, stripe_texture)
 		draw.rectangle(base_layer, {COL1, ROW3_Y, FIT_SIZE, FIT_SIZE}, draw.Color{60, 60, 60, 255}) // bg
-		draw.rectangle_texture(base_layer, inner_s, stripe_texture, uv_rect = uv_s, sampler = sampler_s)
+		draw.rectangle(
+			base_layer,
+			inner_s,
+			draw.Texture_Fill{id = stripe_texture, tint = draw.WHITE, uv_rect = uv_s, sampler = sampler_s},
+		)
 		draw.text(
 			base_layer,
 			"Stretch",
 			{COL1, ROW3_Y + FIT_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
@@ -227,12 +251,16 @@ textures :: proc() {
 		// Fill (center-crop)
 		uv_f, sampler_f, inner_f := draw.fit_params(.Fill, {COL2, ROW3_Y, FIT_SIZE, FIT_SIZE}, stripe_texture)
 		draw.rectangle(base_layer, {COL2, ROW3_Y, FIT_SIZE, FIT_SIZE}, draw.Color{60, 60, 60, 255})
-		draw.rectangle_texture(base_layer, inner_f, stripe_texture, uv_rect = uv_f, sampler = sampler_f)
+		draw.rectangle(
+			base_layer,
+			inner_f,
+			draw.Texture_Fill{id = stripe_texture, tint = draw.WHITE, uv_rect = uv_f, sampler = sampler_f},
+		)
 		draw.text(
 			base_layer,
 			"Fill",
 			{COL2, ROW3_Y + FIT_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
@@ -240,29 +268,139 @@ textures :: proc() {
 		// Fit (letterbox)
 		uv_ft, sampler_ft, inner_ft := draw.fit_params(.Fit, {COL3, ROW3_Y, FIT_SIZE, FIT_SIZE}, stripe_texture)
 		draw.rectangle(base_layer, {COL3, ROW3_Y, FIT_SIZE, FIT_SIZE}, draw.Color{60, 60, 60, 255}) // visible margin bg
-		draw.rectangle_texture(base_layer, inner_ft, stripe_texture, uv_rect = uv_ft, sampler = sampler_ft)
+		draw.rectangle(
+			base_layer,
+			inner_ft,
+			draw.Texture_Fill{id = stripe_texture, tint = draw.WHITE, uv_rect = uv_ft, sampler = sampler_ft},
+		)
 		draw.text(
 			base_layer,
 			"Fit",
 			{COL3, ROW3_Y + FIT_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
 
 		// Per-corner radii
-		draw.rectangle_texture(
+		draw.rectangle(
 			base_layer,
 			{COL4, ROW3_Y, FIT_SIZE, FIT_SIZE},
-			checker_texture,
-			sampler = .Nearest_Clamp,
+			draw.Texture_Fill {
+				id = checker_texture,
+				tint = draw.WHITE,
+				uv_rect = {0, 0, 1, 1},
+				sampler = .Nearest_Clamp,
+			},
 			radii = {20, 0, 20, 0},
 		)
 		draw.text(
 			base_layer,
 			"Per-corner",
 			{COL4, ROW3_Y + FIT_SIZE + LABEL_OFFSET},
-			JETBRAINS_MONO_REGULAR,
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.WHITE,
+		)
+
+		//----- Row 4: Textured shapes (y=520) ----------------------------------
+
+		ROW4_Y :: f32(520)
+		SHAPE_SIZE :: f32(80)
+		SHAPE_GAP :: f32(30)
+		SHAPE_COL1 :: f32(30)
+		SHAPE_COL2 :: SHAPE_COL1 + SHAPE_SIZE + SHAPE_GAP
+		SHAPE_COL3 :: SHAPE_COL2 + SHAPE_SIZE + SHAPE_GAP
+		SHAPE_COL4 :: SHAPE_COL3 + SHAPE_SIZE + SHAPE_GAP
+		SHAPE_COL5 :: SHAPE_COL4 + SHAPE_SIZE + SHAPE_GAP
+
+		checker_fill := draw.Texture_Fill {
+			id      = checker_texture,
+			tint    = draw.WHITE,
+			uv_rect = {0, 0, 1, 1},
+			sampler = .Nearest_Clamp,
+		}
+
+		// Textured circle
+		draw.circle(
+			base_layer,
+			{SHAPE_COL1 + SHAPE_SIZE / 2, ROW4_Y + SHAPE_SIZE / 2},
+			SHAPE_SIZE / 2,
+			checker_fill,
+		)
+		draw.text(
+			base_layer,
+			"Circle",
+			{SHAPE_COL1, ROW4_Y + SHAPE_SIZE + LABEL_OFFSET},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.WHITE,
+		)
+
+		// Textured ellipse
+		draw.ellipse(
+			base_layer,
+			{SHAPE_COL2 + SHAPE_SIZE / 2, ROW4_Y + SHAPE_SIZE / 2},
+			SHAPE_SIZE / 2,
+			SHAPE_SIZE / 3,
+			checker_fill,
+		)
+		draw.text(
+			base_layer,
+			"Ellipse",
+			{SHAPE_COL2, ROW4_Y + SHAPE_SIZE + LABEL_OFFSET},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.WHITE,
+		)
+
+		// Textured polygon (hexagon)
+		draw.polygon(
+			base_layer,
+			{SHAPE_COL3 + SHAPE_SIZE / 2, ROW4_Y + SHAPE_SIZE / 2},
+			6,
+			SHAPE_SIZE / 2,
+			checker_fill,
+		)
+		draw.text(
+			base_layer,
+			"Polygon",
+			{SHAPE_COL3, ROW4_Y + SHAPE_SIZE + LABEL_OFFSET},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.WHITE,
+		)
+
+		// Textured ring
+		draw.ring(
+			base_layer,
+			{SHAPE_COL4 + SHAPE_SIZE / 2, ROW4_Y + SHAPE_SIZE / 2},
+			SHAPE_SIZE / 4,
+			SHAPE_SIZE / 2,
+			checker_fill,
+		)
+		draw.text(
+			base_layer,
+			"Ring",
+			{SHAPE_COL4, ROW4_Y + SHAPE_SIZE + LABEL_OFFSET},
+			PLEX_SANS_REGULAR,
+			FONT_SIZE,
+			color = draw.WHITE,
+		)
+
+		// Textured line (capsule)
+		draw.line(
+			base_layer,
+			{SHAPE_COL5, ROW4_Y + SHAPE_SIZE / 2},
+			{SHAPE_COL5 + SHAPE_SIZE, ROW4_Y + SHAPE_SIZE / 2},
+			checker_fill,
+			thickness = 20,
+		)
+		draw.text(
+			base_layer,
+			"Line",
+			{SHAPE_COL5, ROW4_Y + SHAPE_SIZE + LABEL_OFFSET},
+			PLEX_SANS_REGULAR,
 			FONT_SIZE,
 			color = draw.WHITE,
 		)
diff --git a/draw/pipeline_2d_base.odin b/draw/pipeline_2d_base.odin
deleted file mode 100644
index 2d5923b..0000000
--- a/draw/pipeline_2d_base.odin
+++ /dev/null
@@ -1,722 +0,0 @@
-package draw
-
-import "core:c"
-import "core:log"
-import "core:mem"
-import sdl "vendor:sdl3"
-
-// Vertex layout for tessellated and text geometry.
-// IMPORTANT: `color` must be premultiplied alpha (RGB channels pre-scaled by alpha).
-// The tessellated fragment shader passes vertex color through directly — it does NOT
-// premultiply. The blend state is ONE, ONE_MINUS_SRC_ALPHA (premultiplied-over).
-// Use `premultiply_color` when constructing vertices manually for `prepare_shape`.
-Vertex :: struct {
-	position: Vec2,
-	uv:       [2]f32,
-	color:    Color,
-}
-
-TextBatch :: struct {
-	atlas_texture: ^sdl.GPUTexture,
-	vertex_start:  u32,
-	vertex_count:  u32,
-	index_start:   u32,
-	index_count:   u32,
-}
-
-// ----------------------------------------------------------------------------------------------------------------
-// ----- SDF primitive types -----------
-// ----------------------------------------------------------------------------------------------------------------
-
-// The SDF path evaluates one of four signed distance functions per primitive, dispatched
-// by Shape_Kind encoded in the low byte of Primitive.flags:
-//
-//   RRect    — rounded rectangle with per-corner radii (sdRoundedBox). Also covers circles
-//              (uniform radii = half-size), capsule-style line segments (rotated, max rounding),
-//              and other RRect-reducible shapes.
-//   NGon     — regular polygon with N sides and optional rounding.
-//   Ellipse  — approximate ellipse (non-exact SDF, suitable for UI but not for shape merging).
-//   Ring_Arc — annular ring with optional angular clipping. Covers full rings, partial arcs,
-//              pie slices (inner_radius = 0), and loading spinners.
-Shape_Kind :: enum u8 {
-	Solid    = 0, // tessellated path (mode marker; not a real SDF kind)
-	RRect    = 1,
-	NGon     = 2,
-	Ellipse  = 3,
-	Ring_Arc = 4,
-}
-
-Shape_Flag :: enum u8 {
-	Textured, // bit 0: sample texture using uv.uv_rect (mutually exclusive with Gradient)
-	Gradient, // bit 1: 2-color gradient using uv.effects.gradient_color as end/outer color
-	Gradient_Radial, // bit 2: if set with Gradient, radial from center; else linear at angle
-	Outline, // bit 3: outer outline band using uv.effects.outline_color; CPU expands bounds by outline_width
-	Rotated, // bit 4: shape has non-zero rotation; rotation_sc contains packed sin/cos
-	Arc_Narrow, // bit 5: ring arc span ≤ π — intersect half-planes. Neither Arc bit = full ring.
-	Arc_Wide, // bit 6: ring arc span > π — union half-planes. Neither Arc bit = full ring.
-}
-
-Shape_Flags :: bit_set[Shape_Flag;u8]
-
-RRect_Params :: struct {
-	half_size:    [2]f32,
-	radii:        [4]f32,
-	half_feather: f32, // feather_px * 0.5; shader uses smoothstep(-h, h, d)
-	_:            f32,
-}
-
-NGon_Params :: struct {
-	radius:       f32,
-	sides:        f32,
-	half_feather: f32, // feather_px * 0.5; shader uses smoothstep(-h, h, d)
-	_:            [5]f32,
-}
-
-Ellipse_Params :: struct {
-	radii:        [2]f32,
-	half_feather: f32, // feather_px * 0.5; shader uses smoothstep(-h, h, d)
-	_:            [5]f32,
-}
-
-Ring_Arc_Params :: struct {
-	inner_radius: f32, // inner radius in physical pixels (0 for pie slice)
-	outer_radius: f32, // outer radius in physical pixels
-	normal_start: [2]f32, // pre-computed outward normal of start edge: (sin(start), -cos(start))
-	normal_end:   [2]f32, // pre-computed outward normal of end edge: (-sin(end), cos(end))
-	half_feather: f32, // feather_px * 0.5; shader uses smoothstep(-h, h, d)
-	_:            f32,
-}
-
-Shape_Params :: struct #raw_union {
-	rrect:    RRect_Params,
-	ngon:     NGon_Params,
-	ellipse:  Ellipse_Params,
-	ring_arc: Ring_Arc_Params,
-	raw:      [8]f32,
-}
-#assert(size_of(Shape_Params) == 32)
-
-// GPU-side storage for 2-color gradient parameters and/or outline parameters.
-// Packed into 16 bytes to alias with uv_rect in the Uv_Or_Effects raw union.
-// The shader reads gradient_color and outline_color via unpackUnorm4x8.
-// gradient_dir_sc stores the pre-computed gradient direction as (cos, sin) in f16 pair
-// via unpackHalf2x16. outline_packed stores outline_width as f16 via unpackHalf2x16.
-Gradient_Outline :: struct {
-	gradient_color:  Color, //  0: end (linear) or outer (radial) gradient color
-	outline_color:   Color, //  4: outline band color
-	gradient_dir_sc: u32, //  8: packed f16 pair: low = cos(angle), high = sin(angle) — pre-computed gradient direction
-	outline_packed:  u32, // 12: packed f16 pair: low = outline_width (f16, physical pixels), high = reserved
-}
-
-#assert(size_of(Gradient_Outline) == 16)
-
-// Uv_Or_Effects aliases the final 16 bytes of a Primitive. When .Textured is set,
-// uv_rect holds texture-atlas coordinates. When .Gradient or .Outline is set,
-// effects holds 2-color gradient parameters and/or outline parameters.
-// Textured and Gradient are mutually exclusive; if both are set, Gradient takes precedence.
-Uv_Or_Effects :: struct #raw_union {
-	uv_rect: [4]f32, // u_min, v_min, u_max, v_max (default {0,0,1,1})
-	effects: Gradient_Outline, // gradient + outline parameters
-}
-
-// GPU layout: 80 bytes, std430-compatible. The shader declares this as a storage buffer struct.
-// The low byte of `flags` encodes the Shape_Kind (0 = tessellated, 1-4 = SDF kinds).
-// Bits 8-15 encode Shape_Flags (Textured, Gradient, Gradient_Radial, Outline, Rotated, Arc_Narrow, Arc_Wide).
-// rotation_sc stores pre-computed sin/cos of the rotation angle as a packed f16 pair,
-// avoiding per-pixel trigonometry in the fragment shader. Only read when .Rotated is set.
-Primitive :: struct {
-	bounds:      [4]f32, //  0: min_x, min_y, max_x, max_y (world-space, pre-DPI)
-	color:       Color, // 16: u8x4, fill color / gradient start color / texture tint
-	flags:       u32, // 20: low byte = Shape_Kind, bits 8+ = Shape_Flags
-	rotation_sc: u32, // 24: packed f16 pair: low = sin(angle), high = cos(angle). Requires .Rotated flag.
-	_pad:        f32, // 28: reserved for future use
-	params:      Shape_Params, // 32: per-kind shape parameters (raw union, 32 bytes)
-	uv:          Uv_Or_Effects, // 64: texture coords or gradient/outline parameters
-}
-
-#assert(size_of(Primitive) == 80)
-
-// Pack shape kind and flags into the Primitive.flags field. The low byte encodes the Shape_Kind
-// (which also serves as the SDF mode marker — kind > 0 means SDF path). The tessellated path
-// leaves the field at 0 (Solid kind, set by vertex shader zero-initialization).
-pack_kind_flags :: #force_inline proc(kind: Shape_Kind, flags: Shape_Flags) -> u32 {
-	return u32(kind) | (u32(transmute(u8)flags) << 8)
-}
-
-// Pack two f16 values into a single u32 for GPU consumption via unpackHalf2x16.
-// Used to pack gradient_dir_sc (cos/sin) and outline_packed (width/reserved) in Gradient_Outline.
-pack_f16_pair :: #force_inline proc(low, high: f16) -> u32 {
-	return u32(transmute(u16)low) | (u32(transmute(u16)high) << 16)
-}
-
-Pipeline_2D_Base :: struct {
-	sdl_pipeline:     ^sdl.GPUGraphicsPipeline,
-	vertex_buffer:    Buffer,
-	index_buffer:     Buffer,
-	unit_quad_buffer: ^sdl.GPUBuffer,
-	primitive_buffer: Buffer,
-	white_texture:    ^sdl.GPUTexture,
-	sampler:          ^sdl.GPUSampler,
-}
-
-@(private)
-create_pipeline_2d_base :: proc(
-	device: ^sdl.GPUDevice,
-	window: ^sdl.Window,
-	sample_count: sdl.GPUSampleCount,
-) -> (
-	pipeline: Pipeline_2D_Base,
-	ok: bool,
-) {
-	// On failure, clean up any partially-created resources
-	defer if !ok {
-		if pipeline.sampler != nil do sdl.ReleaseGPUSampler(device, pipeline.sampler)
-		if pipeline.white_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.white_texture)
-		if pipeline.unit_quad_buffer != nil do sdl.ReleaseGPUBuffer(device, pipeline.unit_quad_buffer)
-		if pipeline.primitive_buffer.gpu != nil do destroy_buffer(device, &pipeline.primitive_buffer)
-		if pipeline.index_buffer.gpu != nil do destroy_buffer(device, &pipeline.index_buffer)
-		if pipeline.vertex_buffer.gpu != nil do destroy_buffer(device, &pipeline.vertex_buffer)
-		if pipeline.sdl_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.sdl_pipeline)
-	}
-
-	active_shader_formats := sdl.GetGPUShaderFormats(device)
-	if PLATFORM_SHADER_FORMAT_FLAG not_in active_shader_formats {
-		log.errorf(
-			"draw: no embedded shader matches active GPU formats; this build supports %v but device reports %v",
-			PLATFORM_SHADER_FORMAT,
-			active_shader_formats,
-		)
-		return pipeline, false
-	}
-
-	log.debug("Loaded", len(BASE_VERT_2D_RAW), "vert bytes")
-	log.debug("Loaded", len(BASE_FRAG_2D_RAW), "frag bytes")
-
-	vert_info := sdl.GPUShaderCreateInfo {
-		code_size           = len(BASE_VERT_2D_RAW),
-		code                = raw_data(BASE_VERT_2D_RAW),
-		entrypoint          = SHADER_ENTRY,
-		format              = {PLATFORM_SHADER_FORMAT_FLAG},
-		stage               = .VERTEX,
-		num_uniform_buffers = 1,
-		num_storage_buffers = 1,
-	}
-
-	frag_info := sdl.GPUShaderCreateInfo {
-		code_size    = len(BASE_FRAG_2D_RAW),
-		code         = raw_data(BASE_FRAG_2D_RAW),
-		entrypoint   = SHADER_ENTRY,
-		format       = {PLATFORM_SHADER_FORMAT_FLAG},
-		stage        = .FRAGMENT,
-		num_samplers = 1,
-	}
-
-	vert_shader := sdl.CreateGPUShader(device, vert_info)
-	if vert_shader == nil {
-		log.errorf("Could not create draw vertex shader: %s", sdl.GetError())
-		return pipeline, false
-	}
-
-	frag_shader := sdl.CreateGPUShader(device, frag_info)
-	if frag_shader == nil {
-		sdl.ReleaseGPUShader(device, vert_shader)
-		log.errorf("Could not create draw fragment shader: %s", sdl.GetError())
-		return pipeline, false
-	}
-
-	vertex_attributes: [3]sdl.GPUVertexAttribute = {
-		// position (GLSL location 0)
-		sdl.GPUVertexAttribute{buffer_slot = 0, location = 0, format = .FLOAT2, offset = 0},
-		// uv (GLSL location 1)
-		sdl.GPUVertexAttribute{buffer_slot = 0, location = 1, format = .FLOAT2, offset = size_of([2]f32)},
-		// color (GLSL location 2, u8x4 normalized to float by GPU)
-		sdl.GPUVertexAttribute{buffer_slot = 0, location = 2, format = .UBYTE4_NORM, offset = size_of([2]f32) * 2},
-	}
-
-	pipeline_info := sdl.GPUGraphicsPipelineCreateInfo {
-		vertex_shader = vert_shader,
-		fragment_shader = frag_shader,
-		primitive_type = .TRIANGLELIST,
-		multisample_state = sdl.GPUMultisampleState{sample_count = sample_count},
-		target_info = sdl.GPUGraphicsPipelineTargetInfo {
-			color_target_descriptions = &sdl.GPUColorTargetDescription {
-				format = sdl.GetGPUSwapchainTextureFormat(device, window),
-				// Premultiplied-alpha blending: src outputs RGB pre-multiplied by alpha,
-				// so src factor is ONE (not SRC_ALPHA). This eliminates the per-pixel
-				// divide in the outline path and is the standard blend mode used by
-				// Skia, Flutter, and GPUI.
-				blend_state = sdl.GPUColorTargetBlendState {
-					enable_blend = true,
-					enable_color_write_mask = true,
-					src_color_blendfactor = .ONE,
-					dst_color_blendfactor = .ONE_MINUS_SRC_ALPHA,
-					color_blend_op = .ADD,
-					src_alpha_blendfactor = .ONE,
-					dst_alpha_blendfactor = .ONE_MINUS_SRC_ALPHA,
-					alpha_blend_op = .ADD,
-					color_write_mask = sdl.GPUColorComponentFlags{.R, .G, .B, .A},
-				},
-			},
-			num_color_targets         = 1,
-		},
-		vertex_input_state = sdl.GPUVertexInputState {
-			vertex_buffer_descriptions = &sdl.GPUVertexBufferDescription {
-				slot = 0,
-				input_rate = .VERTEX,
-				pitch = size_of(Vertex),
-			},
-			num_vertex_buffers = 1,
-			vertex_attributes = raw_data(vertex_attributes[:]),
-			num_vertex_attributes = 3,
-		},
-	}
-
-	pipeline.sdl_pipeline = sdl.CreateGPUGraphicsPipeline(device, pipeline_info)
-	// Shaders are no longer needed regardless of pipeline creation success
-	sdl.ReleaseGPUShader(device, vert_shader)
-	sdl.ReleaseGPUShader(device, frag_shader)
-	if pipeline.sdl_pipeline == nil {
-		log.errorf("Failed to create draw graphics pipeline: %s", sdl.GetError())
-		return pipeline, false
-	}
-
-	// Create vertex buffer
-	vert_buf_ok: bool
-	pipeline.vertex_buffer, vert_buf_ok = create_buffer(
-		device,
-		size_of(Vertex) * BUFFER_INIT_SIZE,
-		sdl.GPUBufferUsageFlags{.VERTEX},
-	)
-	if !vert_buf_ok do return pipeline, false
-
-	// Create index buffer (used by text)
-	idx_buf_ok: bool
-	pipeline.index_buffer, idx_buf_ok = create_buffer(
-		device,
-		size_of(c.int) * BUFFER_INIT_SIZE,
-		sdl.GPUBufferUsageFlags{.INDEX},
-	)
-	if !idx_buf_ok do return pipeline, false
-
-	// Create primitive storage buffer (used by SDF instanced drawing)
-	prim_buf_ok: bool
-	pipeline.primitive_buffer, prim_buf_ok = create_buffer(
-		device,
-		size_of(Primitive) * BUFFER_INIT_SIZE,
-		sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ},
-	)
-	if !prim_buf_ok do return pipeline, false
-
-	// Create static 6-vertex unit quad buffer (two triangles, TRIANGLELIST)
-	pipeline.unit_quad_buffer = sdl.CreateGPUBuffer(
-		device,
-		sdl.GPUBufferCreateInfo{usage = {.VERTEX}, size = 6 * size_of(Vertex)},
-	)
-	if pipeline.unit_quad_buffer == nil {
-		log.errorf("Failed to create unit quad buffer: %s", sdl.GetError())
-		return pipeline, false
-	}
-
-	// Create 1x1 white pixel texture
-	pipeline.white_texture = sdl.CreateGPUTexture(
-		device,
-		sdl.GPUTextureCreateInfo {
-			type = .D2,
-			format = .R8G8B8A8_UNORM,
-			usage = {.SAMPLER},
-			width = 1,
-			height = 1,
-			layer_count_or_depth = 1,
-			num_levels = 1,
-			sample_count = ._1,
-		},
-	)
-	if pipeline.white_texture == nil {
-		log.errorf("Failed to create white pixel texture: %s", sdl.GetError())
-		return pipeline, false
-	}
-
-	// Upload white pixel and unit quad data in a single command buffer
-	white_pixel := Color{255, 255, 255, 255}
-	white_transfer_buf := sdl.CreateGPUTransferBuffer(
-		device,
-		sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = size_of(white_pixel)},
-	)
-	if white_transfer_buf == nil {
-		log.errorf("Failed to create white pixel transfer buffer: %s", sdl.GetError())
-		return pipeline, false
-	}
-	defer sdl.ReleaseGPUTransferBuffer(device, white_transfer_buf)
-
-	white_ptr := sdl.MapGPUTransferBuffer(device, white_transfer_buf, false)
-	if white_ptr == nil {
-		log.errorf("Failed to map white pixel transfer buffer: %s", sdl.GetError())
-		return pipeline, false
-	}
-	mem.copy(white_ptr, &white_pixel, size_of(white_pixel))
-	sdl.UnmapGPUTransferBuffer(device, white_transfer_buf)
-
-	quad_verts := [6]Vertex {
-		{position = {0, 0}},
-		{position = {1, 0}},
-		{position = {0, 1}},
-		{position = {0, 1}},
-		{position = {1, 0}},
-		{position = {1, 1}},
-	}
-	quad_transfer_buf := sdl.CreateGPUTransferBuffer(
-		device,
-		sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = size_of(quad_verts)},
-	)
-	if quad_transfer_buf == nil {
-		log.errorf("Failed to create unit quad transfer buffer: %s", sdl.GetError())
-		return pipeline, false
-	}
-	defer sdl.ReleaseGPUTransferBuffer(device, quad_transfer_buf)
-
-	quad_ptr := sdl.MapGPUTransferBuffer(device, quad_transfer_buf, false)
-	if quad_ptr == nil {
-		log.errorf("Failed to map unit quad transfer buffer: %s", sdl.GetError())
-		return pipeline, false
-	}
-	mem.copy(quad_ptr, &quad_verts, size_of(quad_verts))
-	sdl.UnmapGPUTransferBuffer(device, quad_transfer_buf)
-
-	upload_cmd_buffer := sdl.AcquireGPUCommandBuffer(device)
-	if upload_cmd_buffer == nil {
-		log.errorf("Failed to acquire command buffer for init upload: %s", sdl.GetError())
-		return pipeline, false
-	}
-	upload_pass := sdl.BeginGPUCopyPass(upload_cmd_buffer)
-
-	sdl.UploadToGPUTexture(
-		upload_pass,
-		sdl.GPUTextureTransferInfo{transfer_buffer = white_transfer_buf},
-		sdl.GPUTextureRegion{texture = pipeline.white_texture, w = 1, h = 1, d = 1},
-		false,
-	)
-
-	sdl.UploadToGPUBuffer(
-		upload_pass,
-		sdl.GPUTransferBufferLocation{transfer_buffer = quad_transfer_buf},
-		sdl.GPUBufferRegion{buffer = pipeline.unit_quad_buffer, offset = 0, size = size_of(quad_verts)},
-		false,
-	)
-
-	sdl.EndGPUCopyPass(upload_pass)
-	if !sdl.SubmitGPUCommandBuffer(upload_cmd_buffer) {
-		log.errorf("Failed to submit init upload command buffer: %s", sdl.GetError())
-		return pipeline, false
-	}
-
-	log.debug("White pixel texture and unit quad buffer created and uploaded")
-
-	// Create sampler (shared by shapes and text)
-	pipeline.sampler = sdl.CreateGPUSampler(
-		device,
-		sdl.GPUSamplerCreateInfo {
-			min_filter = .LINEAR,
-			mag_filter = .LINEAR,
-			mipmap_mode = .LINEAR,
-			address_mode_u = .CLAMP_TO_EDGE,
-			address_mode_v = .CLAMP_TO_EDGE,
-			address_mode_w = .CLAMP_TO_EDGE,
-		},
-	)
-	if pipeline.sampler == nil {
-		log.errorf("Could not create GPU sampler: %s", sdl.GetError())
-		return pipeline, false
-	}
-
-	log.debug("Done creating unified draw pipeline")
-	return pipeline, true
-}
-
-@(private)
-upload :: proc(device: ^sdl.GPUDevice, pass: ^sdl.GPUCopyPass) {
-	// Upload vertices (shapes then text into one buffer)
-	shape_vert_count := u32(len(GLOB.tmp_shape_verts))
-	text_vert_count := u32(len(GLOB.tmp_text_verts))
-	total_vert_count := shape_vert_count + text_vert_count
-
-	if total_vert_count > 0 {
-		total_vert_size := total_vert_count * size_of(Vertex)
-		shape_vert_size := shape_vert_count * size_of(Vertex)
-		text_vert_size := text_vert_count * size_of(Vertex)
-
-		grow_buffer_if_needed(
-			device,
-			&GLOB.pipeline_2d_base.vertex_buffer,
-			total_vert_size,
-			sdl.GPUBufferUsageFlags{.VERTEX},
-		)
-
-		vert_array := sdl.MapGPUTransferBuffer(device, GLOB.pipeline_2d_base.vertex_buffer.transfer, false)
-		if vert_array == nil {
-			log.panicf("Failed to map vertex transfer buffer: %s", sdl.GetError())
-		}
-		if shape_vert_size > 0 {
-			mem.copy(vert_array, raw_data(GLOB.tmp_shape_verts), int(shape_vert_size))
-		}
-		if text_vert_size > 0 {
-			mem.copy(
-				rawptr(uintptr(vert_array) + uintptr(shape_vert_size)),
-				raw_data(GLOB.tmp_text_verts),
-				int(text_vert_size),
-			)
-		}
-		sdl.UnmapGPUTransferBuffer(device, GLOB.pipeline_2d_base.vertex_buffer.transfer)
-
-		sdl.UploadToGPUBuffer(
-			pass,
-			sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.pipeline_2d_base.vertex_buffer.transfer},
-			sdl.GPUBufferRegion{buffer = GLOB.pipeline_2d_base.vertex_buffer.gpu, offset = 0, size = total_vert_size},
-			false,
-		)
-	}
-
-	// Upload text indices
-	index_count := u32(len(GLOB.tmp_text_indices))
-	if index_count > 0 {
-		index_size := index_count * size_of(c.int)
-
-		grow_buffer_if_needed(
-			device,
-			&GLOB.pipeline_2d_base.index_buffer,
-			index_size,
-			sdl.GPUBufferUsageFlags{.INDEX},
-		)
-
-		idx_array := sdl.MapGPUTransferBuffer(device, GLOB.pipeline_2d_base.index_buffer.transfer, false)
-		if idx_array == nil {
-			log.panicf("Failed to map index transfer buffer: %s", sdl.GetError())
-		}
-		mem.copy(idx_array, raw_data(GLOB.tmp_text_indices), int(index_size))
-		sdl.UnmapGPUTransferBuffer(device, GLOB.pipeline_2d_base.index_buffer.transfer)
-
-		sdl.UploadToGPUBuffer(
-			pass,
-			sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.pipeline_2d_base.index_buffer.transfer},
-			sdl.GPUBufferRegion{buffer = GLOB.pipeline_2d_base.index_buffer.gpu, offset = 0, size = index_size},
-			false,
-		)
-	}
-
-	// Upload SDF primitives
-	prim_count := u32(len(GLOB.tmp_primitives))
-	if prim_count > 0 {
-		prim_size := prim_count * size_of(Primitive)
-
-		grow_buffer_if_needed(
-			device,
-			&GLOB.pipeline_2d_base.primitive_buffer,
-			prim_size,
-			sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ},
-		)
-
-		prim_array := sdl.MapGPUTransferBuffer(device, GLOB.pipeline_2d_base.primitive_buffer.transfer, false)
-		if prim_array == nil {
-			log.panicf("Failed to map primitive transfer buffer: %s", sdl.GetError())
-		}
-		mem.copy(prim_array, raw_data(GLOB.tmp_primitives), int(prim_size))
-		sdl.UnmapGPUTransferBuffer(device, GLOB.pipeline_2d_base.primitive_buffer.transfer)
-
-		sdl.UploadToGPUBuffer(
-			pass,
-			sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.pipeline_2d_base.primitive_buffer.transfer},
-			sdl.GPUBufferRegion{buffer = GLOB.pipeline_2d_base.primitive_buffer.gpu, offset = 0, size = prim_size},
-			false,
-		)
-	}
-}
-
-@(private)
-draw_layer :: proc(
-	device: ^sdl.GPUDevice,
-	window: ^sdl.Window,
-	cmd_buffer: ^sdl.GPUCommandBuffer,
-	render_texture: ^sdl.GPUTexture,
-	swapchain_width: u32,
-	swapchain_height: u32,
-	clear_color: [4]f32,
-	layer: ^Layer,
-) {
-	if layer.sub_batch_len == 0 {
-		if !GLOB.cleared {
-			pass := sdl.BeginGPURenderPass(
-				cmd_buffer,
-				&sdl.GPUColorTargetInfo {
-					texture = render_texture,
-					clear_color = sdl.FColor{clear_color[0], clear_color[1], clear_color[2], clear_color[3]},
-					load_op = .CLEAR,
-					store_op = .STORE,
-				},
-				1,
-				nil,
-			)
-			sdl.EndGPURenderPass(pass)
-			GLOB.cleared = true
-		}
-		return
-	}
-
-	render_pass := sdl.BeginGPURenderPass(
-		cmd_buffer,
-		&sdl.GPUColorTargetInfo {
-			texture = render_texture,
-			clear_color = sdl.FColor{clear_color[0], clear_color[1], clear_color[2], clear_color[3]},
-			load_op = GLOB.cleared ? .LOAD : .CLEAR,
-			store_op = .STORE,
-		},
-		1,
-		nil,
-	)
-	GLOB.cleared = true
-
-	sdl.BindGPUGraphicsPipeline(render_pass, GLOB.pipeline_2d_base.sdl_pipeline)
-
-	// Bind storage buffer (read by vertex shader in SDF mode)
-	sdl.BindGPUVertexStorageBuffers(
-		render_pass,
-		0,
-		([^]^sdl.GPUBuffer)(&GLOB.pipeline_2d_base.primitive_buffer.gpu),
-		1,
-	)
-
-	// Always bind index buffer — harmless if no indexed draws are issued
-	sdl.BindGPUIndexBuffer(
-		render_pass,
-		sdl.GPUBufferBinding{buffer = GLOB.pipeline_2d_base.index_buffer.gpu, offset = 0},
-		._32BIT,
-	)
-
-	// Shorthand aliases for frequently-used pipeline resources
-	main_vert_buf := GLOB.pipeline_2d_base.vertex_buffer.gpu
-	unit_quad := GLOB.pipeline_2d_base.unit_quad_buffer
-	white_texture := GLOB.pipeline_2d_base.white_texture
-	sampler := GLOB.pipeline_2d_base.sampler
-	width := f32(swapchain_width)
-	height := f32(swapchain_height)
-
-	// Initial GPU state: tessellated mode, main vertex buffer, no atlas bound yet
-	push_globals(cmd_buffer, width, height, .Tessellated)
-	sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = main_vert_buf, offset = 0}, 1)
-
-	current_mode: Draw_Mode = .Tessellated
-	current_vert_buf := main_vert_buf
-	current_atlas: ^sdl.GPUTexture
-	current_sampler := sampler
-
-	// Text vertices live after shape vertices in the GPU vertex buffer
-	text_vertex_gpu_base := u32(len(GLOB.tmp_shape_verts))
-
-	for &scissor in GLOB.scissors[layer.scissor_start:][:layer.scissor_len] {
-		sdl.SetGPUScissor(render_pass, scissor.bounds)
-
-		for &batch in GLOB.tmp_sub_batches[scissor.sub_batch_start:][:scissor.sub_batch_len] {
-			switch batch.kind {
-			case .Tessellated:
-				if current_mode != .Tessellated {
-					push_globals(cmd_buffer, width, height, .Tessellated)
-					current_mode = .Tessellated
-				}
-				if current_vert_buf != main_vert_buf {
-					sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = main_vert_buf, offset = 0}, 1)
-					current_vert_buf = main_vert_buf
-				}
-				// Determine texture and sampler for this batch
-				batch_texture: ^sdl.GPUTexture = white_texture
-				batch_sampler: ^sdl.GPUSampler = sampler
-				if batch.texture_id != INVALID_TEXTURE {
-					if bound_texture := texture_gpu_handle(batch.texture_id); bound_texture != nil {
-						batch_texture = bound_texture
-					}
-					batch_sampler = get_sampler(batch.sampler)
-				}
-				if current_atlas != batch_texture || current_sampler != batch_sampler {
-					sdl.BindGPUFragmentSamplers(
-						render_pass,
-						0,
-						&sdl.GPUTextureSamplerBinding{texture = batch_texture, sampler = batch_sampler},
-						1,
-					)
-					current_atlas = batch_texture
-					current_sampler = batch_sampler
-				}
-				sdl.DrawGPUPrimitives(render_pass, batch.count, 1, batch.offset, 0)
-
-			case .Text:
-				if current_mode != .Tessellated {
-					push_globals(cmd_buffer, width, height, .Tessellated)
-					current_mode = .Tessellated
-				}
-				if current_vert_buf != main_vert_buf {
-					sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = main_vert_buf, offset = 0}, 1)
-					current_vert_buf = main_vert_buf
-				}
-				text_batch := &GLOB.tmp_text_batches[batch.offset]
-				if current_atlas != text_batch.atlas_texture {
-					sdl.BindGPUFragmentSamplers(
-						render_pass,
-						0,
-						&sdl.GPUTextureSamplerBinding{texture = text_batch.atlas_texture, sampler = sampler},
-						1,
-					)
-					current_atlas = text_batch.atlas_texture
-				}
-				sdl.DrawGPUIndexedPrimitives(
-					render_pass,
-					text_batch.index_count,
-					1,
-					text_batch.index_start,
-					i32(text_vertex_gpu_base + text_batch.vertex_start),
-					0,
-				)
-
-			case .SDF:
-				if current_mode != .SDF {
-					push_globals(cmd_buffer, width, height, .SDF)
-					current_mode = .SDF
-				}
-				if current_vert_buf != unit_quad {
-					sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = unit_quad, offset = 0}, 1)
-					current_vert_buf = unit_quad
-				}
-				// Determine texture and sampler for this batch
-				batch_texture: ^sdl.GPUTexture = white_texture
-				batch_sampler: ^sdl.GPUSampler = sampler
-				if batch.texture_id != INVALID_TEXTURE {
-					if bound_texture := texture_gpu_handle(batch.texture_id); bound_texture != nil {
-						batch_texture = bound_texture
-					}
-					batch_sampler = get_sampler(batch.sampler)
-				}
-				if current_atlas != batch_texture || current_sampler != batch_sampler {
-					sdl.BindGPUFragmentSamplers(
-						render_pass,
-						0,
-						&sdl.GPUTextureSamplerBinding{texture = batch_texture, sampler = batch_sampler},
-						1,
-					)
-					current_atlas = batch_texture
-					current_sampler = batch_sampler
-				}
-				sdl.DrawGPUPrimitives(render_pass, 6, batch.count, 0, batch.offset)
-			}
-		}
-	}
-
-	sdl.EndGPURenderPass(render_pass)
-}
-
-destroy_pipeline_2d_base :: proc(device: ^sdl.GPUDevice, pipeline: ^Pipeline_2D_Base) {
-	destroy_buffer(device, &pipeline.vertex_buffer)
-	destroy_buffer(device, &pipeline.index_buffer)
-	destroy_buffer(device, &pipeline.primitive_buffer)
-	if pipeline.unit_quad_buffer != nil {
-		sdl.ReleaseGPUBuffer(device, pipeline.unit_quad_buffer)
-	}
-	sdl.ReleaseGPUTexture(device, pipeline.white_texture)
-	sdl.ReleaseGPUSampler(device, pipeline.sampler)
-	sdl.ReleaseGPUGraphicsPipeline(device, pipeline.sdl_pipeline)
-}
diff --git a/draw/shaders/generated/backdrop_blur.frag.metal b/draw/shaders/generated/backdrop_blur.frag.metal
new file mode 100644
index 0000000..dea97aa
--- /dev/null
+++ b/draw/shaders/generated/backdrop_blur.frag.metal
@@ -0,0 +1,118 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+
+#include 
+#include 
+
+using namespace metal;
+
+struct Uniforms
+{
+    float2 inv_working_size;
+    uint pair_count;
+    uint mode;
+    float2 direction;
+    float inv_downsample_factor;
+    float _pad0;
+    float4 kernel0[32];
+};
+
+struct main0_out
+{
+    float4 out_color [[color(0)]];
+};
+
+struct main0_in
+{
+    float2 p_local [[user(locn0)]];
+    float4 f_color [[user(locn1)]];
+    float2 f_half_size [[user(locn2), flat]];
+    float4 f_radii [[user(locn3), flat]];
+    float f_half_feather [[user(locn4), flat]];
+};
+
+static inline __attribute__((always_inline))
+float3 blur_sample(thread const float2& uv, constant Uniforms& _108, texture2d blur_input_tex, sampler blur_input_texSmplr)
+{
+    float3 color = blur_input_tex.sample(blur_input_texSmplr, uv).xyz * _108.kernel0[0].x;
+    float2 axis_step = _108.direction * _108.inv_working_size;
+    for (uint i = 1u; i < _108.pair_count; i++)
+    {
+        float w = _108.kernel0[i].x;
+        float off = _108.kernel0[i].y;
+        float2 step_uv = axis_step * off;
+        color += (blur_input_tex.sample(blur_input_texSmplr, (uv - step_uv)).xyz * w);
+        color += (blur_input_tex.sample(blur_input_texSmplr, (uv + step_uv)).xyz * w);
+    }
+    return color;
+}
+
+static inline __attribute__((always_inline))
+float sdRoundedBox(thread const float2& p, thread const float2& b, thread const float4& r)
+{
+    float2 _36;
+    if (p.x > 0.0)
+    {
+        _36 = r.xy;
+    }
+    else
+    {
+        _36 = r.zw;
+    }
+    float2 rxy = _36;
+    float _50;
+    if (p.y > 0.0)
+    {
+        _50 = rxy.x;
+    }
+    else
+    {
+        _50 = rxy.y;
+    }
+    float rr = _50;
+    float2 q = abs(p) - b;
+    if (rr == 0.0)
+    {
+        return fast::max(q.x, q.y);
+    }
+    q += float2(rr);
+    return (fast::min(fast::max(q.x, q.y), 0.0) + length(fast::max(q, float2(0.0)))) - rr;
+}
+
+static inline __attribute__((always_inline))
+float sdf_alpha(thread const float& d, thread const float& h)
+{
+    return 1.0 - smoothstep(-h, h, d);
+}
+
+fragment main0_out main0(main0_in in [[stage_in]], constant Uniforms& _108 [[buffer(0)]], texture2d blur_input_tex [[texture(0)]], sampler blur_input_texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    if (_108.mode == 0u)
+    {
+        float2 uv = gl_FragCoord.xy * _108.inv_working_size;
+        float2 param = uv;
+        float3 color = blur_sample(param, _108, blur_input_tex, blur_input_texSmplr);
+        out.out_color = float4(color, 1.0);
+        return out;
+    }
+    float2 param_1 = in.p_local;
+    float2 param_2 = in.f_half_size;
+    float4 param_3 = in.f_radii;
+    float d = sdRoundedBox(param_1, param_2, param_3);
+    if (d > in.f_half_feather)
+    {
+        discard_fragment();
+    }
+    float grad_magnitude = fast::max(fwidth(d), 9.9999999747524270787835121154785e-07);
+    float d_n = d / grad_magnitude;
+    float h_n = in.f_half_feather / grad_magnitude;
+    float2 uv_1 = (gl_FragCoord.xy * _108.inv_downsample_factor) * _108.inv_working_size;
+    float3 color_1 = blur_input_tex.sample(blur_input_texSmplr, uv_1).xyz;
+    float3 tinted = mix(color_1, color_1 * in.f_color.xyz, float3(in.f_color.w));
+    float param_4 = d_n;
+    float param_5 = h_n;
+    float coverage = sdf_alpha(param_4, param_5);
+    out.out_color = float4(tinted * coverage, coverage);
+    return out;
+}
+
diff --git a/draw/shaders/generated/backdrop_blur.frag.spv b/draw/shaders/generated/backdrop_blur.frag.spv
new file mode 100644
index 0000000..42e8d7c
Binary files /dev/null and b/draw/shaders/generated/backdrop_blur.frag.spv differ
diff --git a/draw/shaders/generated/backdrop_blur.vert.metal b/draw/shaders/generated/backdrop_blur.vert.metal
new file mode 100644
index 0000000..4bce4ce
--- /dev/null
+++ b/draw/shaders/generated/backdrop_blur.vert.metal
@@ -0,0 +1,123 @@
+#pragma clang diagnostic ignored "-Wmissing-prototypes"
+#pragma clang diagnostic ignored "-Wmissing-braces"
+
+#include 
+#include 
+
+using namespace metal;
+
+template
+struct spvUnsafeArray
+{
+    T elements[Num ? Num : 1];
+    
+    thread T& operator [] (size_t pos) thread
+    {
+        return elements[pos];
+    }
+    constexpr const thread T& operator [] (size_t pos) const thread
+    {
+        return elements[pos];
+    }
+    
+    device T& operator [] (size_t pos) device
+    {
+        return elements[pos];
+    }
+    constexpr const device T& operator [] (size_t pos) const device
+    {
+        return elements[pos];
+    }
+    
+    constexpr const constant T& operator [] (size_t pos) const constant
+    {
+        return elements[pos];
+    }
+    
+    threadgroup T& operator [] (size_t pos) threadgroup
+    {
+        return elements[pos];
+    }
+    constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
+    {
+        return elements[pos];
+    }
+};
+
+struct Uniforms
+{
+    float4x4 projection;
+    float dpi_scale;
+    uint mode;
+    float2 _pad0;
+};
+
+struct Gaussian_Blur_Primitive
+{
+    float4 bounds;
+    float4 radii;
+    float2 half_size;
+    float half_feather;
+    uint color;
+};
+
+struct Gaussian_Blur_Primitive_1
+{
+    float4 bounds;
+    float4 radii;
+    float2 half_size;
+    float half_feather;
+    uint color;
+};
+
+struct Gaussian_Blur_Primitives
+{
+    Gaussian_Blur_Primitive_1 primitives[1];
+};
+
+constant spvUnsafeArray _97 = spvUnsafeArray({ float2(0.0), float2(1.0, 0.0), float2(0.0, 1.0), float2(0.0, 1.0), float2(1.0, 0.0), float2(1.0) });
+
+struct main0_out
+{
+    float2 p_local [[user(locn0)]];
+    float4 f_color [[user(locn1)]];
+    float2 f_half_size [[user(locn2)]];
+    float4 f_radii [[user(locn3)]];
+    float f_half_feather [[user(locn4)]];
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(constant Uniforms& _13 [[buffer(0)]], const device Gaussian_Blur_Primitives& _69 [[buffer(1)]], uint gl_VertexIndex [[vertex_id]], uint gl_InstanceIndex [[instance_id]])
+{
+    main0_out out = {};
+    if (_13.mode == 0u)
+    {
+        float2 ndc = float2((int(gl_VertexIndex) == 1) ? 3.0 : (-1.0), (int(gl_VertexIndex) == 2) ? 3.0 : (-1.0));
+        out.gl_Position = float4(ndc, 0.0, 1.0);
+        out.p_local = float2(0.0);
+        out.f_color = float4(0.0);
+        out.f_half_size = float2(0.0);
+        out.f_radii = float4(0.0);
+        out.f_half_feather = 0.0;
+    }
+    else
+    {
+        Gaussian_Blur_Primitive p;
+        p.bounds = _69.primitives[int(gl_InstanceIndex)].bounds;
+        p.radii = _69.primitives[int(gl_InstanceIndex)].radii;
+        p.half_size = _69.primitives[int(gl_InstanceIndex)].half_size;
+        p.half_feather = _69.primitives[int(gl_InstanceIndex)].half_feather;
+        p.color = _69.primitives[int(gl_InstanceIndex)].color;
+        float2 corner = _97[int(gl_VertexIndex)];
+        float2 world_pos = mix(p.bounds.xy, p.bounds.zw, corner);
+        float2 center = (p.bounds.xy + p.bounds.zw) * 0.5;
+        out.p_local = (world_pos - center) * _13.dpi_scale;
+        out.f_color = unpack_unorm4x8_to_float(p.color);
+        out.f_half_size = p.half_size;
+        out.f_radii = p.radii;
+        out.f_half_feather = p.half_feather;
+        out.gl_Position = _13.projection * float4(world_pos * _13.dpi_scale, 0.0, 1.0);
+    }
+    return out;
+}
+
diff --git a/draw/shaders/generated/backdrop_blur.vert.spv b/draw/shaders/generated/backdrop_blur.vert.spv
new file mode 100644
index 0000000..65522fb
Binary files /dev/null and b/draw/shaders/generated/backdrop_blur.vert.spv differ
diff --git a/draw/shaders/generated/backdrop_downsample.frag.metal b/draw/shaders/generated/backdrop_downsample.frag.metal
new file mode 100644
index 0000000..418df6a
--- /dev/null
+++ b/draw/shaders/generated/backdrop_downsample.frag.metal
@@ -0,0 +1,47 @@
+#include 
+#include 
+
+using namespace metal;
+
+struct Uniforms
+{
+    float2 inv_source_size;
+    uint downsample_factor;
+    uint _pad0;
+};
+
+struct main0_out
+{
+    float4 out_color [[color(0)]];
+};
+
+fragment main0_out main0(constant Uniforms& _18 [[buffer(0)]], texture2d source_tex [[texture(0)]], sampler source_texSmplr [[sampler(0)]], float4 gl_FragCoord [[position]])
+{
+    main0_out out = {};
+    float2 src_block_center = gl_FragCoord.xy * float(_18.downsample_factor);
+    if (_18.downsample_factor == 1u)
+    {
+        float2 uv = src_block_center * _18.inv_source_size;
+        out.out_color = source_tex.sample(source_texSmplr, uv);
+    }
+    else
+    {
+        if (_18.downsample_factor == 2u)
+        {
+            float2 uv_1 = src_block_center * _18.inv_source_size;
+            out.out_color = source_tex.sample(source_texSmplr, uv_1);
+        }
+        else
+        {
+            float off = float(_18.downsample_factor) * 0.25;
+            float2 uv_tl = (src_block_center + float2(-off, -off)) * _18.inv_source_size;
+            float2 uv_tr = (src_block_center + float2(off, -off)) * _18.inv_source_size;
+            float2 uv_bl = (src_block_center + float2(-off, off)) * _18.inv_source_size;
+            float2 uv_br = (src_block_center + float2(off)) * _18.inv_source_size;
+            float4 c = ((source_tex.sample(source_texSmplr, uv_tl) + source_tex.sample(source_texSmplr, uv_tr)) + source_tex.sample(source_texSmplr, uv_bl)) + source_tex.sample(source_texSmplr, uv_br);
+            out.out_color = c * 0.25;
+        }
+    }
+    return out;
+}
+
diff --git a/draw/shaders/generated/backdrop_downsample.frag.spv b/draw/shaders/generated/backdrop_downsample.frag.spv
new file mode 100644
index 0000000..6ab8504
Binary files /dev/null and b/draw/shaders/generated/backdrop_downsample.frag.spv differ
diff --git a/draw/shaders/generated/backdrop_fullscreen.vert.metal b/draw/shaders/generated/backdrop_fullscreen.vert.metal
new file mode 100644
index 0000000..6ecbfd5
--- /dev/null
+++ b/draw/shaders/generated/backdrop_fullscreen.vert.metal
@@ -0,0 +1,18 @@
+#include 
+#include 
+
+using namespace metal;
+
+struct main0_out
+{
+    float4 gl_Position [[position]];
+};
+
+vertex main0_out main0(uint gl_VertexIndex [[vertex_id]])
+{
+    main0_out out = {};
+    float2 ndc = float2((int(gl_VertexIndex) == 1) ? 3.0 : (-1.0), (int(gl_VertexIndex) == 2) ? 3.0 : (-1.0));
+    out.gl_Position = float4(ndc, 0.0, 1.0);
+    return out;
+}
+
diff --git a/draw/shaders/generated/backdrop_fullscreen.vert.spv b/draw/shaders/generated/backdrop_fullscreen.vert.spv
new file mode 100644
index 0000000..8a127a0
Binary files /dev/null and b/draw/shaders/generated/backdrop_fullscreen.vert.spv differ
diff --git a/draw/shaders/generated/base_2d.frag.metal b/draw/shaders/generated/base_2d.frag.metal
index e3e07d6..c2052dd 100644
--- a/draw/shaders/generated/base_2d.frag.metal
+++ b/draw/shaders/generated/base_2d.frag.metal
@@ -24,8 +24,8 @@ struct main0_in
     float4 f_params [[user(locn2)]];
     float4 f_params2 [[user(locn3)]];
     uint f_flags [[user(locn4)]];
-    uint f_rotation_sc [[user(locn5)]];
-    uint4 f_uv_or_effects [[user(locn6)]];
+    float4 f_uv_rect [[user(locn6), flat]];
+    uint4 f_effects [[user(locn7)]];
 };
 
 static inline __attribute__((always_inline))
@@ -109,11 +109,6 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur
     float h = 0.5;
     float2 half_size = in.f_params.xy;
     float2 p_local = in.f_local_or_uv;
-    if ((flags & 16u) != 0u)
-    {
-        float2 sc = float2(as_type(in.f_rotation_sc));
-        p_local = float2((sc.y * p_local.x) + (sc.x * p_local.y), ((-sc.x) * p_local.x) + (sc.y * p_local.y));
-    }
     if (kind == 1u)
     {
         float4 corner_radii = float4(in.f_params.zw, in.f_params2.xy);
@@ -163,16 +158,16 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur
                     {
                         float d_start = dot(p_local, n_start);
                         float d_end = dot(p_local, n_end);
-                        float _372;
+                        float _338;
                         if (arc_bits == 1u)
                         {
-                            _372 = fast::max(d_start, d_end);
+                            _338 = fast::max(d_start, d_end);
                         }
                         else
                         {
-                            _372 = fast::min(d_start, d_end);
+                            _338 = fast::min(d_start, d_end);
                         }
-                        float d_wedge = _372;
+                        float d_wedge = _338;
                         d = fast::max(d, d_wedge);
                     }
                     half_size = float2(outer);
@@ -187,7 +182,7 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur
     if ((flags & 2u) != 0u)
     {
         float4 gradient_start = in.f_color;
-        float4 gradient_end = unpack_unorm4x8_to_float(in.f_uv_or_effects.x);
+        float4 gradient_end = unpack_unorm4x8_to_float(in.f_effects.x);
         if ((flags & 4u) != 0u)
         {
             float t_1 = length(p_local / half_size);
@@ -198,7 +193,7 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur
         }
         else
         {
-            float2 direction = float2(as_type(in.f_uv_or_effects.z));
+            float2 direction = float2(as_type(in.f_effects.z));
             float t_2 = (dot(p_local / half_size, direction) * 0.5) + 0.5;
             float4 param_11 = gradient_start;
             float4 param_12 = gradient_end;
@@ -210,7 +205,7 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur
     {
         if ((flags & 1u) != 0u)
         {
-            float4 uv_rect = as_type(in.f_uv_or_effects);
+            float4 uv_rect = in.f_uv_rect;
             float2 local_uv = ((p_local / half_size) * 0.5) + float2(0.5);
             float2 uv = mix(uv_rect.xy, uv_rect.zw, local_uv);
             shape_color = in.f_color * tex.sample(texSmplr, uv);
@@ -222,8 +217,8 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur
     }
     if ((flags & 8u) != 0u)
     {
-        float4 ol_color = unpack_unorm4x8_to_float(in.f_uv_or_effects.y);
-        float ol_width = float2(as_type(in.f_uv_or_effects.w)).x / grad_magnitude;
+        float4 ol_color = unpack_unorm4x8_to_float(in.f_effects.y);
+        float ol_width = float2(as_type(in.f_effects.w)).x / grad_magnitude;
         float param_14 = d;
         float param_15 = h;
         float fill_cov = sdf_alpha(param_14, param_15);
diff --git a/draw/shaders/generated/base_2d.frag.spv b/draw/shaders/generated/base_2d.frag.spv
index aee3760..8e85e79 100644
Binary files a/draw/shaders/generated/base_2d.frag.spv and b/draw/shaders/generated/base_2d.frag.spv differ
diff --git a/draw/shaders/generated/base_2d.vert.metal b/draw/shaders/generated/base_2d.vert.metal
index d7d7234..0f7c83b 100644
--- a/draw/shaders/generated/base_2d.vert.metal
+++ b/draw/shaders/generated/base_2d.vert.metal
@@ -10,7 +10,7 @@ struct Uniforms
     uint mode;
 };
 
-struct Primitive
+struct Core_2D_Primitive
 {
     float4 bounds;
     uint color;
@@ -19,10 +19,11 @@ struct Primitive
     float _pad;
     float4 params;
     float4 params2;
-    uint4 uv_or_effects;
+    float4 uv_rect;
+    uint4 effects;
 };
 
-struct Primitive_1
+struct Core_2D_Primitive_1
 {
     float4 bounds;
     uint color;
@@ -31,12 +32,13 @@ struct Primitive_1
     float _pad;
     float4 params;
     float4 params2;
-    uint4 uv_or_effects;
+    float4 uv_rect;
+    uint4 effects;
 };
 
-struct Primitives
+struct Core_2D_Primitives
 {
-    Primitive_1 primitives[1];
+    Core_2D_Primitive_1 primitives[1];
 };
 
 struct main0_out
@@ -46,8 +48,8 @@ struct main0_out
     float4 f_params [[user(locn2)]];
     float4 f_params2 [[user(locn3)]];
     uint f_flags [[user(locn4)]];
-    uint f_rotation_sc [[user(locn5)]];
-    uint4 f_uv_or_effects [[user(locn6)]];
+    float4 f_uv_rect [[user(locn6)]];
+    uint4 f_effects [[user(locn7)]];
     float4 gl_Position [[position]];
 };
 
@@ -58,7 +60,7 @@ struct main0_in
     float4 v_color [[attribute(2)]];
 };
 
-vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer(0)]], const device Primitives& _75 [[buffer(1)]], uint gl_InstanceIndex [[instance_id]])
+vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer(0)]], const device Core_2D_Primitives& _75 [[buffer(1)]], uint gl_InstanceIndex [[instance_id]])
 {
     main0_out out = {};
     if (_12.mode == 0u)
@@ -68,13 +70,13 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer
         out.f_params = float4(0.0);
         out.f_params2 = float4(0.0);
         out.f_flags = 0u;
-        out.f_rotation_sc = 0u;
-        out.f_uv_or_effects = uint4(0u);
+        out.f_uv_rect = float4(0.0);
+        out.f_effects = uint4(0u);
         out.gl_Position = _12.projection * float4(in.v_position * _12.dpi_scale, 0.0, 1.0);
     }
     else
     {
-        Primitive p;
+        Core_2D_Primitive p;
         p.bounds = _75.primitives[int(gl_InstanceIndex)].bounds;
         p.color = _75.primitives[int(gl_InstanceIndex)].color;
         p.flags = _75.primitives[int(gl_InstanceIndex)].flags;
@@ -82,17 +84,25 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer
         p._pad = _75.primitives[int(gl_InstanceIndex)]._pad;
         p.params = _75.primitives[int(gl_InstanceIndex)].params;
         p.params2 = _75.primitives[int(gl_InstanceIndex)].params2;
-        p.uv_or_effects = _75.primitives[int(gl_InstanceIndex)].uv_or_effects;
+        p.uv_rect = _75.primitives[int(gl_InstanceIndex)].uv_rect;
+        p.effects = _75.primitives[int(gl_InstanceIndex)].effects;
         float2 corner = in.v_position;
         float2 world_pos = mix(p.bounds.xy, p.bounds.zw, corner);
         float2 center = (p.bounds.xy + p.bounds.zw) * 0.5;
+        float2 local = (world_pos - center) * _12.dpi_scale;
+        uint flags = (p.flags >> 8u) & 255u;
+        if ((flags & 16u) != 0u)
+        {
+            float2 sc = float2(as_type(p.rotation_sc));
+            local = float2((sc.y * local.x) + (sc.x * local.y), ((-sc.x) * local.x) + (sc.y * local.y));
+        }
         out.f_color = unpack_unorm4x8_to_float(p.color);
-        out.f_local_or_uv = (world_pos - center) * _12.dpi_scale;
+        out.f_local_or_uv = local;
         out.f_params = p.params;
         out.f_params2 = p.params2;
         out.f_flags = p.flags;
-        out.f_rotation_sc = p.rotation_sc;
-        out.f_uv_or_effects = p.uv_or_effects;
+        out.f_uv_rect = p.uv_rect;
+        out.f_effects = p.effects;
         out.gl_Position = _12.projection * float4(world_pos * _12.dpi_scale, 0.0, 1.0);
     }
     return out;
diff --git a/draw/shaders/generated/base_2d.vert.spv b/draw/shaders/generated/base_2d.vert.spv
index 5cc30c5..2d18546 100644
Binary files a/draw/shaders/generated/base_2d.vert.spv and b/draw/shaders/generated/base_2d.vert.spv differ
diff --git a/draw/shaders/source/backdrop_blur.frag b/draw/shaders/source/backdrop_blur.frag
new file mode 100644
index 0000000..7193a24
--- /dev/null
+++ b/draw/shaders/source/backdrop_blur.frag
@@ -0,0 +1,155 @@
+#version 450 core
+
+// Unified backdrop blur fragment shader.
+// Handles both the 1D separable blur passes (mode 0, used for BOTH the H-pass and V-pass;
+// `direction` picks the axis) and the composite pass (mode 1, reads the fully-blurred
+// working texture, masks via RRect SDF, applies tint, and writes to source_texture with
+// premultiplied-over blending). Working textures are sized at the full swapchain resolution;
+// downsampled content occupies only a sub-rect at downsample factor > 1 (set via viewport).
+//
+// The composite blends with source_texture via the standard premultiplied-over blend state
+// (ONE, ONE_MINUS_SRC_ALPHA).
+//
+// Backdrop primitives are tint-only — there is no outline. A specialized edge effect
+// (e.g. liquid-glass-style refraction outlines) would be implemented as a dedicated
+// primitive type with its own pipeline.
+//
+// Two modes, structurally distinct:
+//
+//   Mode 0: 1D separable blur. Used for BOTH the H-pass and V-pass; `direction` (set in the
+//           per-pass uniforms) picks (1,0) for H or (0,1) for V. Reads the previous working-
+//           res texture and writes the next working-res texture. Fullscreen-triangle vertex
+//           output; gl_FragCoord.xy is in working-res target pixel space; UV =
+//           gl_FragCoord.xy * inv_working_size.
+//
+//   Mode 1: composite. Reads the fully-blurred working-res texture, applies the SDF mask and
+//           tint, writes to source_texture. Instanced unit-quad vertex output covering the
+//           per-primitive bounds; gl_FragCoord.xy is in the full-resolution render target;
+//           UV into the blurred working texture =
+//           (gl_FragCoord.xy * inv_downsample_factor) * inv_working_size.
+//           No kernel is applied here — the blur is already complete.
+//
+// V-blur is run as its own working→working pass rather than folded into the composite. The
+// folded variant produced a horizontal-vs-vertical asymmetry artifact: when V-blur sampled
+// the H-blur output through the bilinear-upsample/SDF-mask/tint pipeline in one shader
+// invocation, horizontal source features ended up looking sharper than vertical ones.
+// Matching V's structure exactly to H's restores symmetry.
+
+const uint MAX_KERNEL_PAIRS = 32;
+
+// --- Inputs from vertex shader ---
+layout(location = 0) in vec2 p_local;
+layout(location = 1) in mediump vec4 f_color;
+layout(location = 2) flat in vec2 f_half_size;
+layout(location = 3) flat in vec4 f_radii;
+layout(location = 4) flat in float f_half_feather;
+
+// --- Output ---
+layout(location = 0) out vec4 out_color;
+
+// --- Sampler ---
+// Mode 0: bound to downsample_texture. Mode 1: bound to h_blur_texture.
+layout(set = 2, binding = 0) uniform sampler2D blur_input_tex;
+
+// --- Uniforms (set 3) ---
+// Per-bracket-substage. `mode` matches the vertex shader's mode (0 = H, 1 = V).
+// `direction` selects the kernel axis for blur offsets.
+// `kernel` holds the per-sigma weight/offset pairs computed CPU-side using the
+// linear-sampling pair adjustment (RAD/Rákos).
+layout(set = 3, binding = 0) uniform Uniforms {
+    vec2 inv_working_size; // 1.0 / working-resolution texture dimensions
+    uint pair_count; // number of (weight, offset) pairs; pair[0] is the center
+    uint mode; // 0 = H-blur, 1 = V-composite
+    vec2 direction; // (1,0) for H, (0,1) for V — multiplied into the kernel offset
+    float inv_downsample_factor; // 1.0 / downsample_factor (mode 1 only; mode 0 ignores)
+    float _pad0;
+    vec4 kernel[MAX_KERNEL_PAIRS]; // .x = weight (paired-sum for idx>0), .y = offset (texels)
+};
+
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- SDF helper --------------------
+// ---------------------------------------------------------------------------------------------------------------------
+
+float sdRoundedBox(vec2 p, vec2 b, vec4 r) {
+    vec2 rxy = (p.x > 0.0) ? r.xy : r.zw;
+    float rr = (p.y > 0.0) ? rxy.x : rxy.y;
+    vec2 q = abs(p) - b;
+    if (rr == 0.0) {
+        return max(q.x, q.y);
+    }
+    q += rr;
+    return min(max(q.x, q.y), 0.0) + length(max(q, vec2(0.0))) - rr;
+}
+
+float sdf_alpha(float d, float h) {
+    return 1.0 - smoothstep(-h, h, d);
+}
+
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- Blur sample loop --------------
+// ---------------------------------------------------------------------------------------------------------------------
+
+vec3 blur_sample(vec2 uv) {
+    vec3 color = kernel[0].x * texture(blur_input_tex, uv).rgb;
+
+    // Per-pair offset in texel space, projected onto the active axis.
+    vec2 axis_step = direction * inv_working_size;
+
+    for (uint i = 1u; i < pair_count; i += 1u) {
+        float w = kernel[i].x;
+        float off = kernel[i].y;
+        vec2 step_uv = off * axis_step;
+        color += w * texture(blur_input_tex, uv - step_uv).rgb;
+        color += w * texture(blur_input_tex, uv + step_uv).rgb;
+    }
+
+    return color;
+}
+
+// ---------------------------------------------------------------------------------------------------------------------
+// ----- Main --------------------------
+// ---------------------------------------------------------------------------------------------------------------------
+
+void main() {
+    if (mode == 0u) {
+        // ---- Mode 0: 1D separable blur (used for both H-pass and V-pass).
+        // gl_FragCoord is in working-res target pixel space; sample the previous working-res
+        // texture along `direction` with the kernel.
+        vec2 uv = gl_FragCoord.xy * inv_working_size;
+        vec3 color = blur_sample(uv);
+        out_color = vec4(color, 1.0);
+        return;
+    }
+
+    // ---- Mode 1: composite per-primitive.
+    // RRect SDF — early discard for fragments well outside the masked region.
+    float d = sdRoundedBox(p_local, f_half_size, f_radii);
+    if (d > f_half_feather) {
+        discard;
+    }
+
+    // fwidth-based normalization for AA (matches main pipeline approach).
+    float grad_magnitude = max(fwidth(d), 1e-6);
+    float d_n = d / grad_magnitude;
+    float h_n = f_half_feather / grad_magnitude;
+
+    // Sample the fully-blurred working-res texture. gl_FragCoord is full-res; convert to
+    // working-res UV via inv_downsample_factor. No kernel is applied — the H+V blur passes
+    // already produced the final blurred image; this is just an upsample + tint.
+    vec2 uv = (gl_FragCoord.xy * inv_downsample_factor) * inv_working_size;
+    vec3 color = texture(blur_input_tex, uv).rgb;
+
+    // Tint composition: inside the masked region the panel is fully opaque — it completely
+    // hides the original framebuffer content, just like real frosted glass and like iOS
+    // UIBlurEffect / CSS backdrop-filter. f_color.rgb specifies the tint color; f_color.a
+    // specifies the tint *mix strength* (NOT panel opacity). At alpha=0 we see the pure
+    // blur; at alpha=255 we see the blur fully multiplied by the tint color.
+    //
+    // Output is premultiplied to match the ONE, ONE_MINUS_SRC_ALPHA blend state. Coverage
+    // (the SDF mask's edge AA) modulates only the alpha channel, never the panel-vs-source
+    // blend; that way edge pixels still feather correctly while mid-panel pixels stay fully
+    // opaque.
+    mediump vec3 tinted = mix(color, color * f_color.rgb, f_color.a);
+    mediump float coverage = sdf_alpha(d_n, h_n);
+    out_color = vec4(tinted * coverage, coverage);
+}
diff --git a/draw/shaders/source/backdrop_blur.vert b/draw/shaders/source/backdrop_blur.vert
new file mode 100644
index 0000000..01d3c65
--- /dev/null
+++ b/draw/shaders/source/backdrop_blur.vert
@@ -0,0 +1,110 @@
+#version 450 core
+
+// Unified backdrop blur vertex shader.
+// Handles both the 1D separable blur passes (fullscreen triangle, mode 0; used for
+// BOTH the H-pass and V-pass) and the composite pass (instanced unit-quad over
+// Gaussian_Blur_Primitive storage buffer, mode 1) for the second PSO of the backdrop bracket.
+// The first PSO (downsample) uses backdrop_fullscreen.vert.
+//
+// No vertex buffer for either mode. Mode 0 uses gl_VertexIndex 0..2 for a single
+// fullscreen triangle; mode 1 uses gl_VertexIndex 0..5 for a unit-quad (two
+// triangles, TRIANGLELIST topology) and gl_InstanceIndex to select the primitive.
+//
+// Mode 0 viewport+scissor are CPU-set per sigma group to the work region (union AABB
+// of that group's backdrop primitives + halo, clamped to swapchain bounds). Mode 1
+// renders into source_texture with the screen-space orthographic projection; the
+// per-primitive bounds drive the quad in screen space.
+//
+// Backdrop primitives have NO rotation — backdrop sampling is in screen space, so
+// a rotated mask over a stationary blur sample would look wrong.
+
+// --- Outputs to fragment shader ---
+// p_local: shape-local position in physical pixels (origin at shape center).
+//          Only meaningful in mode 1 (V-composite). Zero-init for mode 0.
+layout(location = 0) out vec2 p_local;
+// f_color: tint, unpacked from primitive.color. Only meaningful in mode 1.
+layout(location = 1) out mediump vec4 f_color;
+// f_half_size: RRect half extents in physical pixels (mode 1 only).
+layout(location = 2) flat out vec2 f_half_size;
+// f_radii: per-corner radii in physical pixels (mode 1 only).
+layout(location = 3) flat out vec4 f_radii;
+// f_half_feather: SDF anti-aliasing feather (mode 1 only).
+layout(location = 4) flat out float f_half_feather;
+
+// --- Uniforms (set 1) ---
+// Backdrop pipeline's own uniform block — distinct from the main pipeline's
+// Vertex_Uniforms_2D. `mode` selects between H-blur (0) and V-composite (1).
+layout(set = 1, binding = 0) uniform Uniforms {
+    mat4 projection;
+    float dpi_scale;
+    uint mode; // 0 = H-blur, 1 = V-composite
+    vec2 _pad0;
+};
+
+// --- Gaussian blur primitive storage buffer (set 0) ---
+// 48 bytes, std430-natural layout (no implicit padding). vec4 members are
+// front-loaded so their 16-byte alignment is satisfied without holes; the
+// vec2 and scalar tail packs tight to land the struct at a clean 48-byte
+// stride (a multiple of 16, so the array stride needs no rounding either).
+// Field semantics match the CPU-side Gaussian_Blur_Primitive declared in
+// levlib/draw/backdrop.odin; keep both in sync.
+//
+// Gaussian blur primitives are tint-only: outline is intentionally absent. Specialized
+// edge effects (e.g. liquid-glass-style refraction outlines) would be a dedicated
+// primitive type with its own pipeline rather than a flag bit here.
+struct Gaussian_Blur_Primitive {
+    vec4 bounds; //  0-15: min_xy, max_xy (world-space)
+    vec4 radii; // 16-31: per-corner radii (physical px)
+    vec2 half_size; // 32-39: RRect half extents (physical px)
+    float half_feather; // 40-43: SDF anti-aliasing feather (physical px)
+    uint color; // 44-47: tint, packed RGBA u8x4
+};
+
+layout(std430, set = 0, binding = 0) readonly buffer Gaussian_Blur_Primitives {
+    Gaussian_Blur_Primitive primitives[];
+};
+
+void main() {
+    if (mode == 0u) {
+        // ---- Mode 0: H-blur fullscreen triangle ----
+        // gl_VertexIndex 0 -> ( -1, -1)
+        // gl_VertexIndex 1 -> (  3, -1)
+        // gl_VertexIndex 2 -> ( -1,  3)
+        vec2 ndc = vec2(
+                (gl_VertexIndex == 1) ? 3.0 : -1.0,
+                (gl_VertexIndex == 2) ? 3.0 : -1.0);
+        gl_Position = vec4(ndc, 0.0, 1.0);
+
+        // Mode 0 doesn't read the per-primitive varyings; zero-init for safety.
+        p_local = vec2(0.0);
+        f_color = vec4(0.0);
+        f_half_size = vec2(0.0);
+        f_radii = vec4(0.0);
+        f_half_feather = 0.0;
+    } else {
+        // ---- Mode 1: V-composite instanced unit-quad over Gaussian_Blur_Primitive ----
+        Gaussian_Blur_Primitive p = primitives[gl_InstanceIndex];
+
+        // Unit-quad corners for TRIANGLELIST (2 triangles, 6 vertices):
+        //   index 0 -> (0,0)   index 3 -> (0,1)
+        //   index 1 -> (1,0)   index 4 -> (1,0)
+        //   index 2 -> (0,1)   index 5 -> (1,1)
+        vec2 quad_corners[6] = vec2[6](
+                vec2(0.0, 0.0), vec2(1.0, 0.0), vec2(0.0, 1.0),
+                vec2(0.0, 1.0), vec2(1.0, 0.0), vec2(1.0, 1.0));
+        vec2 corner = quad_corners[gl_VertexIndex];
+
+        vec2 world_pos = mix(p.bounds.xy, p.bounds.zw, corner);
+        vec2 center = 0.5 * (p.bounds.xy + p.bounds.zw);
+
+        // Shape-local position in physical pixels (no rotation for backdrops).
+        p_local = (world_pos - center) * dpi_scale;
+
+        f_color = unpackUnorm4x8(p.color);
+        f_half_size = p.half_size;
+        f_radii = p.radii;
+        f_half_feather = p.half_feather;
+
+        gl_Position = projection * vec4(world_pos * dpi_scale, 0.0, 1.0);
+    }
+}
diff --git a/draw/shaders/source/backdrop_downsample.frag b/draw/shaders/source/backdrop_downsample.frag
new file mode 100644
index 0000000..f9b9b65
--- /dev/null
+++ b/draw/shaders/source/backdrop_downsample.frag
@@ -0,0 +1,67 @@
+#version 450 core
+
+// Backdrop downsample fragment shader.
+// Reads source_texture (full-resolution snapshot of pre-bracket framebuffer contents) and
+// writes a downsampled copy at factor 1, 2, or 4. The output is the working texture (sized
+// at full swapchain resolution); larger factors only fill a sub-rect of it via the CPU-set
+// viewport. See backdrop.odin for the factor selection table (Flutter-style).
+//
+// Shader paths by factor:
+//
+//   factor=1: identity copy. One bilinear tap aligned to the source pixel center. Useful
+//             when sigma is small enough that any downsample round-trip would visibly soften
+//             the output (Flutter does this for sigma_phys ≤ 4).
+//
+//   factor=2: each output covers a 2×2 source block. Single bilinear tap at the shared
+//             corner reads all 4 source pixels with 0.25 weight.
+//
+//   factor=4: each output covers a 4×4 source block. We use 4 bilinear taps, each at the
+//             shared corner of a 2×2 sub-block. Each tap reads 4 source pixels uniformly;
+//             combined, the 4 taps sample 16 source pixels arranged uniformly across the
+//             block (full coverage at factor=4). The factor>=4 path is structured so the
+//             same shader code would extend to factor=8 (16 pixels of 64) or factor=16 (16
+//             of 256) if the CPU-side cap is ever raised, though the current cap is 4.
+//
+// The viewport+scissor are set by the CPU to limit output to the layer's work region in
+// working-texture coords (work_region_phys / factor), clamped to the texture bounds.
+
+layout(set = 3, binding = 0) uniform Uniforms {
+    vec2 inv_source_size; // 1.0 / source_texture pixel dimensions
+    uint downsample_factor; // 1, 2, 4, 8, or 16
+    uint _pad0;
+};
+
+layout(set = 2, binding = 0) uniform sampler2D source_tex;
+
+layout(location = 0) out vec4 out_color;
+
+void main() {
+    // Output pixel index (i): gl_FragCoord.xy - 0.5. Source-pixel block top-left for this
+    // output: i * factor. Center of the block: i*factor + factor/2 = gl_FragCoord.xy * factor.
+    vec2 src_block_center = gl_FragCoord.xy * float(downsample_factor);
+
+    if (downsample_factor == 1u) {
+        // Identity copy. UV at src_block_center hits the source pixel center directly.
+        vec2 uv = src_block_center * inv_source_size;
+        out_color = texture(source_tex, uv);
+    } else if (downsample_factor == 2u) {
+        // Single tap at the shared corner of the 2×2 source block; one bilinear sample reads
+        // all 4 source pixels with equal 0.25 weights — uniform 2×2 box filter for free.
+        vec2 uv = src_block_center * inv_source_size;
+        out_color = texture(source_tex, uv);
+    } else {
+        // Four taps at offsets ±(factor/4) from the block center. Each tap lands on a corner
+        // shared by 4 source pixels of a (factor/2)×(factor/2) sub-block (equivalent at the
+        // bilinear level), giving a 4-tap = 16-source-pixel uniform sample of the block.
+        float off = float(downsample_factor) * 0.25;
+        vec2 uv_tl = (src_block_center + vec2(-off, -off)) * inv_source_size;
+        vec2 uv_tr = (src_block_center + vec2(off, -off)) * inv_source_size;
+        vec2 uv_bl = (src_block_center + vec2(-off, off)) * inv_source_size;
+        vec2 uv_br = (src_block_center + vec2(off, off)) * inv_source_size;
+        vec4 c = texture(source_tex, uv_tl)
+                + texture(source_tex, uv_tr)
+                + texture(source_tex, uv_bl)
+                + texture(source_tex, uv_br);
+        out_color = c * 0.25;
+    }
+}
diff --git a/draw/shaders/source/backdrop_fullscreen.vert b/draw/shaders/source/backdrop_fullscreen.vert
new file mode 100644
index 0000000..d3800ea
--- /dev/null
+++ b/draw/shaders/source/backdrop_fullscreen.vert
@@ -0,0 +1,21 @@
+#version 450 core
+
+// Fullscreen-triangle vertex shader for the backdrop downsample and H-blur sub-passes.
+// Emits a single triangle covering NDC [-1,1]^2; the rasterizer clips edges outside.
+// No vertex buffer; uses gl_VertexIndex to pick corners.
+//
+// The CPU sets the viewport (and matching scissor) per layer-bracket to limit work to
+// the union AABB of the layer's backdrop primitives, expanded by 3*max_sigma and
+// clamped to swapchain bounds. The fragment shader uses gl_FragCoord (absolute pixel
+// space in the bound target) plus an inv-size uniform to compute its own UVs — see
+// each fragment shader for the per-pass sampling math.
+
+void main() {
+    // gl_VertexIndex 0 -> ( -1, -1)
+    // gl_VertexIndex 1 -> (  3, -1)
+    // gl_VertexIndex 2 -> ( -1,  3)
+    vec2 ndc = vec2(
+            (gl_VertexIndex == 1) ? 3.0 : -1.0,
+            (gl_VertexIndex == 2) ? 3.0 : -1.0);
+    gl_Position = vec4(ndc, 0.0, 1.0);
+}
diff --git a/draw/shaders/source/base_2d.frag b/draw/shaders/source/base_2d.frag
index 0ebb043..7f0ed6e 100644
--- a/draw/shaders/source/base_2d.frag
+++ b/draw/shaders/source/base_2d.frag
@@ -6,8 +6,8 @@ layout(location = 1) in vec2 f_local_or_uv;
 layout(location = 2) in vec4 f_params;
 layout(location = 3) in vec4 f_params2;
 layout(location = 4) flat in uint f_flags;
-layout(location = 5) flat in uint f_rotation_sc;
-layout(location = 6) flat in uvec4 f_uv_or_effects;
+layout(location = 6) flat in vec4 f_uv_rect;
+layout(location = 7) flat in uvec4 f_effects;
 
 // --- Output ---
 layout(location = 0) out vec4 out_color;
@@ -83,16 +83,7 @@ void main() {
     float h = 0.5; // half-feather width; overwritten per shape kind
     vec2 half_size = f_params.xy; // used by RRect and as reference size for gradients
 
-    vec2 p_local = f_local_or_uv;
-
-    // Apply inverse rotation using pre-computed sin/cos (no per-pixel trig).
-    // .Rotated flag = bit 4 = 16u
-    if ((flags & 16u) != 0u) {
-        vec2 sc = unpackHalf2x16(f_rotation_sc); // .x = sin(angle), .y = cos(angle)
-        // Inverse rotation matrix R(-angle) = [[cos, sin], [-sin, cos]]
-        p_local = vec2(sc.y * p_local.x + sc.x * p_local.y,
-                -sc.x * p_local.x + sc.y * p_local.y);
-    }
+    vec2 p_local = f_local_or_uv; // arrives rotated; vertex shader handled .Rotated
 
     if (kind == 1u) {
         // RRect — half_feather in params2.z
@@ -151,7 +142,7 @@ void main() {
     if ((flags & 2u) != 0u) {
         // Gradient active (bit 1)
         mediump vec4 gradient_start = f_color;
-        mediump vec4 gradient_end = unpackUnorm4x8(f_uv_or_effects.x);
+        mediump vec4 gradient_end = unpackUnorm4x8(f_effects.x);
 
         if ((flags & 4u) != 0u) {
             // Radial gradient (bit 2): t from distance to center
@@ -159,13 +150,13 @@ void main() {
             shape_color = gradient_2color(gradient_start, gradient_end, t);
         } else {
             // Linear gradient: direction pre-computed on CPU as (cos, sin) f16 pair
-            vec2 direction = unpackHalf2x16(f_uv_or_effects.z);
+            vec2 direction = unpackHalf2x16(f_effects.z);
             mediump float t = dot(p_local / half_size, direction) * 0.5 + 0.5;
             shape_color = gradient_2color(gradient_start, gradient_end, t);
         }
     } else if ((flags & 1u) != 0u) {
-        // Textured (bit 0) — RRect only in practice
-        vec4 uv_rect = uintBitsToFloat(f_uv_or_effects);
+        // Textured (bit 0)
+        vec4 uv_rect = f_uv_rect;
         vec2 local_uv = p_local / half_size * 0.5 + 0.5;
         vec2 uv = mix(uv_rect.xy, uv_rect.zw, local_uv);
         shape_color = f_color * texture(tex, uv);
@@ -180,9 +171,9 @@ void main() {
     // AA at d=ol_width. The outline band's coverage is total_cov - fill_cov.
     // Output is premultiplied: blend state is ONE, ONE_MINUS_SRC_ALPHA.
     if ((flags & 8u) != 0u) {
-        mediump vec4 ol_color = unpackUnorm4x8(f_uv_or_effects.y);
-        // Outline width in f_uv_or_effects.w (low f16 half)
-        float ol_width = unpackHalf2x16(f_uv_or_effects.w).x / grad_magnitude;
+        mediump vec4 ol_color = unpackUnorm4x8(f_effects.y);
+        // Outline width in f_effects.w (low f16 half)
+        float ol_width = unpackHalf2x16(f_effects.w).x / grad_magnitude;
 
         float fill_cov = sdf_alpha(d, h);
         float total_cov = sdf_alpha(d - ol_width, h);
diff --git a/draw/shaders/source/base_2d.vert b/draw/shaders/source/base_2d.vert
index 0f74941..e259374 100644
--- a/draw/shaders/source/base_2d.vert
+++ b/draw/shaders/source/base_2d.vert
@@ -11,8 +11,9 @@ layout(location = 1) out vec2 f_local_or_uv;
 layout(location = 2) out vec4 f_params;
 layout(location = 3) out vec4 f_params2;
 layout(location = 4) flat out uint f_flags;
-layout(location = 5) flat out uint f_rotation_sc;
-layout(location = 6) flat out uvec4 f_uv_or_effects;
+
+layout(location = 6) flat out vec4 f_uv_rect;
+layout(location = 7) flat out uvec4 f_effects;
 
 // ---------- Uniforms (single block — avoids spirv-cross reordering on Metal) ----------
 layout(set = 1, binding = 0) uniform Uniforms {
@@ -22,7 +23,10 @@ layout(set = 1, binding = 0) uniform Uniforms {
 };
 
 // ---------- SDF primitive storage buffer ----------
-struct Primitive {
+// Mirrors the CPU-side Core_2D_Primitive in core_2d.odin. Named with the
+// subsystem prefix so a project-wide grep on the type name matches both the GLSL
+// declaration and the Odin declaration.
+struct Core_2D_Primitive {
     vec4 bounds; // 0-15
     uint color; // 16-19
     uint flags; // 20-23
@@ -30,41 +34,56 @@ struct Primitive {
     float _pad; // 28-31
     vec4 params; // 32-47
     vec4 params2; // 48-63
-    uvec4 uv_or_effects; // 64-79
+    vec4 uv_rect; // 64-79: texture UV coordinates (read when .Textured)
+    uvec4 effects; // 80-95: gradient/outline parameters (read when .Gradient/.Outline)
 };
 
-layout(std430, set = 0, binding = 0) readonly buffer Primitives {
-    Primitive primitives[];
+layout(std430, set = 0, binding = 0) readonly buffer Core_2D_Primitives {
+    Core_2D_Primitive primitives[];
 };
 
 // ---------- Entry point ----------
 void main() {
     if (mode == 0u) {
-        // ---- Mode 0: Tessellated (legacy) ----
+        // ---- Mode 0: Tessellated (used for text and arbitrary user geometry) ----
         f_color = v_color;
         f_local_or_uv = v_uv;
         f_params = vec4(0.0);
         f_params2 = vec4(0.0);
         f_flags = 0u;
-        f_rotation_sc = 0u;
-        f_uv_or_effects = uvec4(0);
+        f_uv_rect = vec4(0.0);
+        f_effects = uvec4(0);
 
         gl_Position = projection * vec4(v_position * dpi_scale, 0.0, 1.0);
     } else {
         // ---- Mode 1: SDF instanced quads ----
-        Primitive p = primitives[gl_InstanceIndex];
+        Core_2D_Primitive p = primitives[gl_InstanceIndex];
 
         vec2 corner = v_position; // unit quad corners: (0,0)-(1,1)
         vec2 world_pos = mix(p.bounds.xy, p.bounds.zw, corner);
         vec2 center = 0.5 * (p.bounds.xy + p.bounds.zw);
 
+        // Compute shape-local position. Apply inverse rotation here in the vertex
+        // shader; the rasterizer interpolates the rotated values across the quad,
+        // which is mathematically equivalent to per-fragment rotation under 2D ortho
+        // projection. Frees one fragment-shader varying and per-pixel rotation math.
+        vec2 local = (world_pos - center) * dpi_scale;
+        uint flags = (p.flags >> 8u) & 0xFFu;
+        if ((flags & 16u) != 0u) {
+            // Rotated flag (bit 4); rotation_sc holds packed f16 (sin, cos).
+            // Inverse rotation matrix R(-angle) = [[cos, sin], [-sin, cos]].
+            vec2 sc = unpackHalf2x16(p.rotation_sc);
+            local = vec2(sc.y * local.x + sc.x * local.y,
+                    -sc.x * local.x + sc.y * local.y);
+        }
+
         f_color = unpackUnorm4x8(p.color);
-        f_local_or_uv = (world_pos - center) * dpi_scale; // shape-centered physical pixels
+        f_local_or_uv = local; // shape-local physical pixels (rotated if .Rotated set)
         f_params = p.params;
         f_params2 = p.params2;
         f_flags = p.flags;
-        f_rotation_sc = p.rotation_sc;
-        f_uv_or_effects = p.uv_or_effects;
+        f_uv_rect = p.uv_rect;
+        f_effects = p.effects;
 
         gl_Position = projection * vec4(world_pos * dpi_scale, 0.0, 1.0);
     }
diff --git a/draw/shapes.odin b/draw/shapes.odin
deleted file mode 100644
index 75c3852..0000000
--- a/draw/shapes.odin
+++ /dev/null
@@ -1,776 +0,0 @@
-package draw
-
-import "core:math"
-
-// ----- Internal helpers ----
-
-// Internal
-extrude_line :: proc(
-	start, end_pos: Vec2,
-	thickness: f32,
-	color: Color,
-	vertices: []Vertex,
-	offset: int,
-) -> int {
-	direction := end_pos - start
-	delta_x := direction[0]
-	delta_y := direction[1]
-	length := math.sqrt(delta_x * delta_x + delta_y * delta_y)
-	if length < 0.0001 do return 0
-
-	scale := thickness / (2 * length)
-	perpendicular := Vec2{-delta_y * scale, delta_x * scale}
-
-	p0 := start + perpendicular
-	p1 := start - perpendicular
-	p2 := end_pos - perpendicular
-	p3 := end_pos + perpendicular
-
-	vertices[offset + 0] = solid_vertex(p0, color)
-	vertices[offset + 1] = solid_vertex(p1, color)
-	vertices[offset + 2] = solid_vertex(p2, color)
-	vertices[offset + 3] = solid_vertex(p0, color)
-	vertices[offset + 4] = solid_vertex(p2, color)
-	vertices[offset + 5] = solid_vertex(p3, color)
-
-	return 6
-}
-
-// Create a vertex for solid-color shape drawing (no texture, UV defaults to zero).
-// Color is premultiplied: the tessellated fragment shader passes it through directly
-// and the blend state is ONE, ONE_MINUS_SRC_ALPHA.
-solid_vertex :: proc(position: Vec2, color: Color) -> Vertex {
-	return Vertex{position = position, color = premultiply_color(color)}
-}
-
-emit_rectangle :: proc(x, y, width, height: f32, color: Color, vertices: []Vertex, offset: int) {
-	vertices[offset + 0] = solid_vertex({x, y}, color)
-	vertices[offset + 1] = solid_vertex({x + width, y}, color)
-	vertices[offset + 2] = solid_vertex({x + width, y + height}, color)
-	vertices[offset + 3] = solid_vertex({x, y}, color)
-	vertices[offset + 4] = solid_vertex({x + width, y + height}, color)
-	vertices[offset + 5] = solid_vertex({x, y + height}, color)
-}
-
-// Internal
-prepare_sdf_primitive_textured :: proc(
-	layer: ^Layer,
-	prim: Primitive,
-	texture_id: Texture_Id,
-	sampler: Sampler_Preset,
-) {
-	offset := u32(len(GLOB.tmp_primitives))
-	append(&GLOB.tmp_primitives, prim)
-	scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1]
-	append_or_extend_sub_batch(scissor, layer, .SDF, offset, 1, texture_id, sampler)
-}
-
-//Internal
-//
-// Compute the visual center of a center-parametrized shape after applying
-// Convention B origin semantics: `center` is where the origin-point lands in
-// world space; the visual center is offset by -origin and then rotated around
-// the landing point.
-//   visual_center = center + R(θ) · (-origin)
-// When θ=0: visual_center = center - origin (pure positioning shift).
-// When origin={0,0}: visual_center = center (no change).
-compute_pivot_center :: proc(center: Vec2, origin: Vec2, sin_angle, cos_angle: f32) -> Vec2 {
-	if origin == {0, 0} do return center
-	return(
-		center +
-		{cos_angle * (-origin.x) - sin_angle * (-origin.y), sin_angle * (-origin.x) + cos_angle * (-origin.y)} \
-	)
-}
-
-// Compute the AABB half-extents of a rectangle with half-size (half_width, half_height) rotated by the given cos/sin.
-rotated_aabb_half_extents :: proc(half_width, half_height, cos_angle, sin_angle: f32) -> [2]f32 {
-	cos_abs := abs(cos_angle)
-	sin_abs := abs(sin_angle)
-	return {half_width * cos_abs + half_height * sin_abs, half_width * sin_abs + half_height * cos_abs}
-}
-
-// Pack sin/cos into the Primitive.rotation_sc field as two f16 values.
-pack_rotation_sc :: #force_inline proc(sin_angle, cos_angle: f32) -> u32 {
-	return pack_f16_pair(f16(sin_angle), f16(cos_angle))
-}
-
-
-// Internal
-//
-// Build an RRect Primitive with bounds, params, and rotation computed from rectangle geometry.
-// The caller sets color, flags, and uv fields on the returned primitive before submitting.
-build_rrect_primitive :: proc(
-	rect: Rectangle,
-	radii: Rectangle_Radii,
-	origin: Vec2,
-	rotation: f32,
-	feather_px: f32,
-) -> Primitive {
-	max_radius := min(rect.width, rect.height) * 0.5
-	clamped_top_left := clamp(radii.top_left, 0, max_radius)
-	clamped_top_right := clamp(radii.top_right, 0, max_radius)
-	clamped_bottom_right := clamp(radii.bottom_right, 0, max_radius)
-	clamped_bottom_left := clamp(radii.bottom_left, 0, max_radius)
-
-	half_feather := feather_px * 0.5
-	padding := half_feather / GLOB.dpi_scaling
-	dpi_scale := GLOB.dpi_scaling
-
-	half_width := rect.width * 0.5
-	half_height := rect.height * 0.5
-	center_x := rect.x + half_width - origin.x
-	center_y := rect.y + half_height - origin.y
-	sin_angle: f32 = 0
-	cos_angle: f32 = 1
-	has_rotation := false
-
-	if needs_transform(origin, rotation) {
-		rotation_radians := math.to_radians(rotation)
-		sin_angle, cos_angle = math.sincos(rotation_radians)
-		has_rotation = rotation != 0
-		transform := build_pivot_rotation_sc({rect.x + origin.x, rect.y + origin.y}, origin, cos_angle, sin_angle)
-		new_center := apply_transform(transform, {half_width, half_height})
-		center_x = new_center.x
-		center_y = new_center.y
-	}
-
-	bounds_half_width, bounds_half_height := half_width, half_height
-	if has_rotation {
-		expanded := rotated_aabb_half_extents(half_width, half_height, cos_angle, sin_angle)
-		bounds_half_width = expanded.x
-		bounds_half_height = expanded.y
-	}
-
-	prim := Primitive {
-		bounds      = {
-			center_x - bounds_half_width - padding,
-			center_y - bounds_half_height - padding,
-			center_x + bounds_half_width + padding,
-			center_y + bounds_half_height + padding,
-		},
-		rotation_sc = has_rotation ? pack_rotation_sc(sin_angle, cos_angle) : 0,
-	}
-	prim.params.rrect = RRect_Params {
-		half_size    = {half_width * dpi_scale, half_height * dpi_scale},
-		radii        = {
-			clamped_bottom_right * dpi_scale,
-			clamped_top_right * dpi_scale,
-			clamped_bottom_left * dpi_scale,
-			clamped_top_left * dpi_scale,
-		},
-		half_feather = half_feather,
-	}
-	return prim
-}
-
-// Internal
-//
-// Build an RRect Primitive for a circle (fully-rounded square RRect).
-// The caller sets color, flags, and uv fields on the returned primitive before submitting.
-build_circle_primitive :: proc(
-	center: Vec2,
-	radius: f32,
-	origin: Vec2,
-	rotation: f32,
-	feather_px: f32,
-) -> Primitive {
-	half_feather := feather_px * 0.5
-	padding := half_feather / GLOB.dpi_scaling
-	dpi_scale := GLOB.dpi_scaling
-
-	actual_center := center
-	if origin != {0, 0} {
-		sin_a, cos_a := math.sincos(math.to_radians(rotation))
-		actual_center = compute_pivot_center(center, origin, sin_a, cos_a)
-	}
-
-	prim := Primitive {
-		bounds = {
-			actual_center.x - radius - padding,
-			actual_center.y - radius - padding,
-			actual_center.x + radius + padding,
-			actual_center.y + radius + padding,
-		},
-	}
-	scaled_radius := radius * dpi_scale
-	prim.params.rrect = RRect_Params {
-		half_size    = {scaled_radius, scaled_radius},
-		radii        = {scaled_radius, scaled_radius, scaled_radius, scaled_radius},
-		half_feather = half_feather,
-	}
-	return prim
-}
-
-// Internal
-//
-// Build an Ellipse Primitive with bounds, params, and rotation computed from ellipse geometry.
-// The caller sets color, flags, and uv fields on the returned primitive before submitting.
-build_ellipse_primitive :: proc(
-	center: Vec2,
-	radius_horizontal, radius_vertical: f32,
-	origin: Vec2,
-	rotation: f32,
-	feather_px: f32,
-) -> Primitive {
-	half_feather := feather_px * 0.5
-	padding := half_feather / GLOB.dpi_scaling
-	dpi_scale := GLOB.dpi_scaling
-
-	actual_center := center
-	sin_angle: f32 = 0
-	cos_angle: f32 = 1
-	has_rotation := false
-
-	if needs_transform(origin, rotation) {
-		rotation_radians := math.to_radians(rotation)
-		sin_angle, cos_angle = math.sincos(rotation_radians)
-		actual_center = compute_pivot_center(center, origin, sin_angle, cos_angle)
-		has_rotation = rotation != 0
-	}
-
-	bound_horizontal, bound_vertical := radius_horizontal, radius_vertical
-	if has_rotation {
-		expanded := rotated_aabb_half_extents(radius_horizontal, radius_vertical, cos_angle, sin_angle)
-		bound_horizontal = expanded.x
-		bound_vertical = expanded.y
-	}
-
-	prim := Primitive {
-		bounds      = {
-			actual_center.x - bound_horizontal - padding,
-			actual_center.y - bound_vertical - padding,
-			actual_center.x + bound_horizontal + padding,
-			actual_center.y + bound_vertical + padding,
-		},
-		rotation_sc = has_rotation ? pack_rotation_sc(sin_angle, cos_angle) : 0,
-	}
-	prim.params.ellipse = Ellipse_Params {
-		radii        = {radius_horizontal * dpi_scale, radius_vertical * dpi_scale},
-		half_feather = half_feather,
-	}
-	return prim
-}
-
-// Internal
-//
-// Build an NGon Primitive with bounds, params, and rotation computed from polygon geometry.
-// The caller sets color, flags, and uv fields on the returned primitive before submitting.
-build_polygon_primitive :: proc(
-	center: Vec2,
-	sides: int,
-	radius: f32,
-	origin: Vec2,
-	rotation: f32,
-	feather_px: f32,
-) -> Primitive {
-	half_feather := feather_px * 0.5
-	padding := half_feather / GLOB.dpi_scaling
-	dpi_scale := GLOB.dpi_scaling
-
-	actual_center := center
-	if origin != {0, 0} && rotation != 0 {
-		sin_a, cos_a := math.sincos(math.to_radians(rotation))
-		actual_center = compute_pivot_center(center, origin, sin_a, cos_a)
-	}
-
-	rotation_radians := math.to_radians(rotation)
-	sin_rot, cos_rot := math.sincos(rotation_radians)
-
-	prim := Primitive {
-		bounds      = {
-			actual_center.x - radius - padding,
-			actual_center.y - radius - padding,
-			actual_center.x + radius + padding,
-			actual_center.y + radius + padding,
-		},
-		rotation_sc = rotation != 0 ? pack_rotation_sc(sin_rot, cos_rot) : 0,
-	}
-	prim.params.ngon = NGon_Params {
-		radius       = radius * math.cos(math.PI / f32(sides)) * dpi_scale,
-		sides        = f32(sides),
-		half_feather = half_feather,
-	}
-	return prim
-}
-
-// Internal
-//
-// Build a Ring_Arc Primitive with bounds and params computed from ring/arc geometry.
-// Pre-computes the angular boundary normals on the CPU so the fragment shader needs
-// no per-pixel sin/cos. The radial SDF uses max(inner-r, r-outer) which correctly
-// handles pie slices (inner_radius = 0) and full rings.
-// The caller sets color, flags, and uv fields on the returned primitive before submitting.
-build_ring_arc_primitive :: proc(
-	center: Vec2,
-	inner_radius, outer_radius: f32,
-	start_angle: f32,
-	end_angle: f32,
-	origin: Vec2,
-	rotation: f32,
-	feather_px: f32,
-) -> (
-	Primitive,
-	Shape_Flags,
-) {
-	half_feather := feather_px * 0.5
-	padding := half_feather / GLOB.dpi_scaling
-	dpi_scale := GLOB.dpi_scaling
-
-	actual_center := center
-	rotation_offset: f32 = 0
-	if needs_transform(origin, rotation) {
-		sin_a, cos_a := math.sincos(math.to_radians(rotation))
-		actual_center = compute_pivot_center(center, origin, sin_a, cos_a)
-		rotation_offset = math.to_radians(rotation)
-	}
-
-	start_rad := math.to_radians(start_angle) + rotation_offset
-	end_rad := math.to_radians(end_angle) + rotation_offset
-
-	// Normalize arc span to [0, 2π]
-	arc_span := end_rad - start_rad
-	if arc_span < 0 {
-		arc_span += 2 * math.PI
-	}
-
-	// Pre-compute edge normals and arc flags on CPU — no per-pixel trig needed.
-	// arc_flags: {} = full ring, {.Arc_Narrow} = span ≤ π (intersect), {.Arc_Wide} = span > π (union)
-	arc_flags: Shape_Flags = {}
-	normal_start: [2]f32 = {}
-	normal_end: [2]f32 = {}
-
-	if arc_span < 2 * math.PI - 0.001 {
-		sin_start, cos_start := math.sincos(start_rad)
-		sin_end, cos_end := math.sincos(end_rad)
-		normal_start = {sin_start, -cos_start}
-		normal_end = {-sin_end, cos_end}
-		arc_flags = arc_span <= math.PI ? {.Arc_Narrow} : {.Arc_Wide}
-	}
-
-	prim := Primitive {
-		bounds = {
-			actual_center.x - outer_radius - padding,
-			actual_center.y - outer_radius - padding,
-			actual_center.x + outer_radius + padding,
-			actual_center.y + outer_radius + padding,
-		},
-	}
-	prim.params.ring_arc = Ring_Arc_Params {
-		inner_radius = inner_radius * dpi_scale,
-		outer_radius = outer_radius * dpi_scale,
-		normal_start = normal_start,
-		normal_end   = normal_end,
-		half_feather = half_feather,
-	}
-	return prim, arc_flags
-}
-
-// Apply gradient and outline effects to a primitive. Sets flags, uv.effects, and expands bounds.
-// All parameters (outline_width) are in logical pixels, matching the rest of the public API.
-// The helper converts to physical pixels for GPU packing internally.
-@(private)
-apply_shape_effects :: proc(
-	prim: ^Primitive,
-	kind: Shape_Kind,
-	gradient: Gradient,
-	outline_color: Color,
-	outline_width: f32,
-	extra_flags: Shape_Flags = {},
-) {
-	flags: Shape_Flags = extra_flags
-	gradient_dir_sc: u32 = 0
-
-	switch g in gradient {
-	case Linear_Gradient:
-		flags += {.Gradient}
-		prim.uv.effects.gradient_color = g.end_color
-		rad := math.to_radians(g.angle)
-		sin_a, cos_a := math.sincos(rad)
-		gradient_dir_sc = pack_f16_pair(f16(cos_a), f16(sin_a))
-	case Radial_Gradient:
-		flags += {.Gradient, .Gradient_Radial}
-		prim.uv.effects.gradient_color = g.outer_color
-	case:
-	}
-
-	outline_packed: u32 = 0
-	if outline_width > 0 {
-		flags += {.Outline}
-		prim.uv.effects.outline_color = outline_color
-		outline_packed = pack_f16_pair(f16(outline_width * GLOB.dpi_scaling), 0)
-		// Expand bounds to contain the outline (bounds are in logical pixels)
-		prim.bounds[0] -= outline_width
-		prim.bounds[1] -= outline_width
-		prim.bounds[2] += outline_width
-		prim.bounds[3] += outline_width
-	}
-
-	// Set .Rotated flag if rotation_sc was populated by the build proc
-	if prim.rotation_sc != 0 {
-		flags += {.Rotated}
-	}
-
-	prim.uv.effects.gradient_dir_sc = gradient_dir_sc
-	prim.uv.effects.outline_packed = outline_packed
-	prim.flags = pack_kind_flags(kind, flags)
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- SDF Rectangle procs -----------
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Draw a filled rectangle via SDF with optional per-corner rounding radii.
-// Use `uniform_radii(rect, roundness)` to compute uniform radii from a 0–1 fraction.
-//
-// Origin semantics:
-//   `origin` is a local offset from the rect's top-left corner that selects both the positioning
-//   anchor and the rotation pivot. `rect.x, rect.y` specifies where that anchor point lands in
-//   world space. When `origin = {0, 0}` (default), `rect.x, rect.y` is the top-left corner.
-//   Rotation always occurs around the anchor point.
-rectangle :: proc(
-	layer: ^Layer,
-	rect: Rectangle,
-	color: Color,
-	gradient: Gradient = nil,
-	outline_color: Color = {},
-	outline_width: f32 = 0,
-	radii: Rectangle_Radii = {},
-	origin: Vec2 = {},
-	rotation: f32 = 0,
-	feather_px: f32 = DFT_FEATHER_PX,
-) {
-	prim := build_rrect_primitive(rect, radii, origin, rotation, feather_px)
-	prim.color = color
-	apply_shape_effects(&prim, .RRect, gradient, outline_color, outline_width)
-	prepare_sdf_primitive(layer, prim)
-}
-
-// Draw a rectangle with a texture fill via SDF with optional per-corner rounding radii.
-// Texture and gradient/outline are mutually exclusive (they share the same storage in the
-// primitive). To outline a textured rect, draw the texture first, then a stroke-only rect on top.
-// Origin semantics: see `rectangle`.
-rectangle_texture :: proc(
-	layer: ^Layer,
-	rect: Rectangle,
-	id: Texture_Id,
-	tint: Color = DFT_TINT,
-	uv_rect: Rectangle = DFT_UV_RECT,
-	sampler: Sampler_Preset = DFT_SAMPLER,
-	radii: Rectangle_Radii = {},
-	origin: Vec2 = {},
-	rotation: f32 = 0,
-	feather_px: f32 = DFT_FEATHER_PX,
-) {
-	prim := build_rrect_primitive(rect, radii, origin, rotation, feather_px)
-	prim.color = tint
-	tex_flags: Shape_Flags = {.Textured}
-	if prim.rotation_sc != 0 {
-		tex_flags += {.Rotated}
-	}
-	prim.flags = pack_kind_flags(.RRect, tex_flags)
-	prim.uv.uv_rect = {uv_rect.x, uv_rect.y, uv_rect.width, uv_rect.height}
-	prepare_sdf_primitive_textured(layer, prim, id, sampler)
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- SDF Circle procs (emit RRect primitives) ------
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Draw a filled circle via SDF (emitted as a fully-rounded RRect).
-//
-// Origin semantics (Convention B):
-//   `origin` is a local offset from the shape's center that selects both the positioning anchor
-//   and the rotation pivot. The `center` parameter specifies where that anchor point lands in
-//   world space. When `origin = {0, 0}` (default), `center` is the visual center.
-//   When `origin = {r, 0}`, the point `r` pixels to the right of the shape center lands at
-//   `center`, shifting the shape left by `r`.
-circle :: proc(
-	layer: ^Layer,
-	center: Vec2,
-	radius: f32,
-	color: Color,
-	gradient: Gradient = nil,
-	outline_color: Color = {},
-	outline_width: f32 = 0,
-	origin: Vec2 = {},
-	rotation: f32 = 0,
-	feather_px: f32 = DFT_FEATHER_PX,
-) {
-	prim := build_circle_primitive(center, radius, origin, rotation, feather_px)
-	prim.color = color
-	apply_shape_effects(&prim, .RRect, gradient, outline_color, outline_width)
-	prepare_sdf_primitive(layer, prim)
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- SDF Ellipse procs (emit Ellipse primitives) ---
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Draw a filled ellipse via SDF.
-// Origin semantics: see `circle`.
-ellipse :: proc(
-	layer: ^Layer,
-	center: Vec2,
-	radius_horizontal, radius_vertical: f32,
-	color: Color,
-	gradient: Gradient = nil,
-	outline_color: Color = {},
-	outline_width: f32 = 0,
-	origin: Vec2 = {},
-	rotation: f32 = 0,
-	feather_px: f32 = DFT_FEATHER_PX,
-) {
-	prim := build_ellipse_primitive(center, radius_horizontal, radius_vertical, origin, rotation, feather_px)
-	prim.color = color
-	apply_shape_effects(&prim, .Ellipse, gradient, outline_color, outline_width)
-	prepare_sdf_primitive(layer, prim)
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- SDF Polygon procs (emit NGon primitives) ------
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Draw a filled regular polygon via SDF.
-// `sides` must be >= 3. The polygon is inscribed in a circle of the given `radius`.
-// Origin semantics: see `circle`.
-polygon :: proc(
-	layer: ^Layer,
-	center: Vec2,
-	sides: int,
-	radius: f32,
-	color: Color,
-	gradient: Gradient = nil,
-	outline_color: Color = {},
-	outline_width: f32 = 0,
-	origin: Vec2 = {},
-	rotation: f32 = 0,
-	feather_px: f32 = DFT_FEATHER_PX,
-) {
-	if sides < 3 do return
-
-	prim := build_polygon_primitive(center, sides, radius, origin, rotation, feather_px)
-	prim.color = color
-	apply_shape_effects(&prim, .NGon, gradient, outline_color, outline_width)
-	prepare_sdf_primitive(layer, prim)
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- SDF Ring / Arc procs (emit Ring_Arc primitives) ----
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Draw a ring, arc, or pie slice via SDF.
-// Full ring by default. Pass start_angle/end_angle (degrees) for partial arcs.
-// Use inner_radius = 0 for pie slices (sectors).
-// Origin semantics: see `circle`.
-ring :: proc(
-	layer: ^Layer,
-	center: Vec2,
-	inner_radius, outer_radius: f32,
-	color: Color,
-	gradient: Gradient = nil,
-	outline_color: Color = {},
-	outline_width: f32 = 0,
-	start_angle: f32 = 0,
-	end_angle: f32 = DFT_CIRC_END_ANGLE,
-	origin: Vec2 = {},
-	rotation: f32 = 0,
-	feather_px: f32 = DFT_FEATHER_PX,
-) {
-	prim, arc_flags := build_ring_arc_primitive(
-		center,
-		inner_radius,
-		outer_radius,
-		start_angle,
-		end_angle,
-		origin,
-		rotation,
-		feather_px,
-	)
-	prim.color = color
-	apply_shape_effects(&prim, .Ring_Arc, gradient, outline_color, outline_width, arc_flags)
-	prepare_sdf_primitive(layer, prim)
-}
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- SDF Line procs (emit rotated RRect primitives) ----
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Draw a line segment via SDF (emitted as a rotated capsule-shaped RRect).
-// Round caps are produced by setting corner radii equal to half the thickness.
-line :: proc(
-	layer: ^Layer,
-	start_position, end_position: Vec2,
-	color: Color,
-	thickness: f32 = DFT_STROKE_THICKNESS,
-	outline_color: Color = {},
-	outline_width: f32 = 0,
-	feather_px: f32 = DFT_FEATHER_PX,
-) {
-	delta_x := end_position.x - start_position.x
-	delta_y := end_position.y - start_position.y
-	seg_length := math.sqrt(delta_x * delta_x + delta_y * delta_y)
-	if seg_length < 0.0001 do return
-	rotation_radians := math.atan2(delta_y, delta_x)
-	sin_angle, cos_angle := math.sincos(rotation_radians)
-
-	center_x := (start_position.x + end_position.x) * 0.5
-	center_y := (start_position.y + end_position.y) * 0.5
-
-	half_length := seg_length * 0.5
-	half_thickness := thickness * 0.5
-	cap_radius := half_thickness
-
-	half_feather := feather_px * 0.5
-	padding := half_feather / GLOB.dpi_scaling
-	dpi_scale := GLOB.dpi_scaling
-
-	// Expand bounds for rotation
-	bounds_half := rotated_aabb_half_extents(half_length + cap_radius, half_thickness, cos_angle, sin_angle)
-
-	prim := Primitive {
-		bounds      = {
-			center_x - bounds_half.x - padding,
-			center_y - bounds_half.y - padding,
-			center_x + bounds_half.x + padding,
-			center_y + bounds_half.y + padding,
-		},
-		color       = color,
-		rotation_sc = pack_rotation_sc(sin_angle, cos_angle),
-	}
-	prim.params.rrect = RRect_Params {
-		half_size    = {(half_length + cap_radius) * dpi_scale, half_thickness * dpi_scale},
-		radii        = {
-			cap_radius * dpi_scale,
-			cap_radius * dpi_scale,
-			cap_radius * dpi_scale,
-			cap_radius * dpi_scale,
-		},
-		half_feather = half_feather,
-	}
-	apply_shape_effects(&prim, .RRect, nil, outline_color, outline_width)
-	prepare_sdf_primitive(layer, prim)
-}
-
-// Draw a line strip via decomposed SDF line segments.
-line_strip :: proc(
-	layer: ^Layer,
-	points: []Vec2,
-	color: Color,
-	thickness: f32 = DFT_STROKE_THICKNESS,
-	outline_color: Color = {},
-	outline_width: f32 = 0,
-	feather_px: f32 = DFT_FEATHER_PX,
-) {
-	if len(points) < 2 do return
-	for i in 0 ..< len(points) - 1 {
-		line(layer, points[i], points[i + 1], color, thickness, outline_color, outline_width, feather_px)
-	}
-}
-
-
-// ---------------------------------------------------------------------------------------------------------------------
-// ----- Helpers ----------------
-// ---------------------------------------------------------------------------------------------------------------------
-
-// Returns uniform radii (all corners the same) as a fraction of the shorter side.
-// `roundness` is clamped to [0, 1]; 0 = sharp corners, 1 = fully rounded (stadium or circle).
-uniform_radii :: #force_inline proc(rect: Rectangle, roundness: f32) -> Rectangle_Radii {
-	cr := min(rect.width, rect.height) * clamp(roundness, 0, 1) * 0.5
-	return {cr, cr, cr, cr}
-}
-
-// Return Vec2 pixel offsets for use as the `origin` parameter of draw calls.
-// Composable with normal vector +/- arithmetic.
-//
-// Text anchor helpers are in text.odin (they depend on measure_text / SDL_ttf).
-
-// ----- Rectangle anchors (origin measured from rectangle's top-left) ---------------------------------------------
-
-center_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {rectangle.width * 0.5, rectangle.height * 0.5}
-}
-
-top_left_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {0, 0}
-}
-
-top_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {rectangle.width * 0.5, 0}
-}
-
-top_right_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {rectangle.width, 0}
-}
-
-left_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {0, rectangle.height * 0.5}
-}
-
-right_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {rectangle.width, rectangle.height * 0.5}
-}
-
-bottom_left_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {0, rectangle.height}
-}
-
-bottom_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {rectangle.width * 0.5, rectangle.height}
-}
-
-bottom_right_of_rectangle :: #force_inline proc(rectangle: Rectangle) -> Vec2 {
-	return {rectangle.width, rectangle.height}
-}
-
-// ----- Triangle anchors (origin measured from AABB top-left) -----------------------------------------------------
-
-center_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	bounds_min := Vec2{min(v1.x, v2.x, v3.x), min(v1.y, v2.y, v3.y)}
-	return (v1 + v2 + v3) / 3 - bounds_min
-}
-
-top_left_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	return {0, 0}
-}
-
-top_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	min_x := min(v1.x, v2.x, v3.x)
-	max_x := max(v1.x, v2.x, v3.x)
-	return {(max_x - min_x) * 0.5, 0}
-}
-
-top_right_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	min_x := min(v1.x, v2.x, v3.x)
-	max_x := max(v1.x, v2.x, v3.x)
-	return {max_x - min_x, 0}
-}
-
-left_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	min_y := min(v1.y, v2.y, v3.y)
-	max_y := max(v1.y, v2.y, v3.y)
-	return {0, (max_y - min_y) * 0.5}
-}
-
-right_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	bounds_min := Vec2{min(v1.x, v2.x, v3.x), min(v1.y, v2.y, v3.y)}
-	bounds_max := Vec2{max(v1.x, v2.x, v3.x), max(v1.y, v2.y, v3.y)}
-	return {bounds_max.x - bounds_min.x, (bounds_max.y - bounds_min.y) * 0.5}
-}
-
-bottom_left_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	min_y := min(v1.y, v2.y, v3.y)
-	max_y := max(v1.y, v2.y, v3.y)
-	return {0, max_y - min_y}
-}
-
-bottom_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	bounds_min := Vec2{min(v1.x, v2.x, v3.x), min(v1.y, v2.y, v3.y)}
-	bounds_max := Vec2{max(v1.x, v2.x, v3.x), max(v1.y, v2.y, v3.y)}
-	return {(bounds_max.x - bounds_min.x) * 0.5, bounds_max.y - bounds_min.y}
-}
-
-bottom_right_of_triangle :: #force_inline proc(v1, v2, v3: Vec2) -> Vec2 {
-	bounds_min := Vec2{min(v1.x, v2.x, v3.x), min(v1.y, v2.y, v3.y)}
-	bounds_max := Vec2{max(v1.x, v2.x, v3.x), max(v1.y, v2.y, v3.y)}
-	return bounds_max - bounds_min
-}
diff --git a/draw/tess/tess.odin b/draw/tess/tess.odin
index 55caad6..10ec7f5 100644
--- a/draw/tess/tess.odin
+++ b/draw/tess/tess.odin
@@ -4,6 +4,7 @@ import "core:math"
 
 import draw ".."
 
+//INTERNAL
 SMOOTH_CIRCLE_ERROR_RATE :: 0.1
 
 auto_segments :: proc(radius: f32, arc_degrees: f32) -> int {
@@ -22,11 +23,18 @@ auto_segments :: proc(radius: f32, arc_degrees: f32) -> int {
 
 // Color is premultiplied: the tessellated fragment shader passes it through directly
 // and the blend state is ONE, ONE_MINUS_SRC_ALPHA.
-solid_vertex :: proc(position: draw.Vec2, color: draw.Color) -> draw.Vertex {
-	return draw.Vertex{position = position, color = draw.premultiply_color(color)}
+//INTERNAL
+solid_vertex :: proc(position: draw.Vec2, color: draw.Color) -> draw.Vertex_2D {
+	return draw.Vertex_2D{position = position, color = draw.premultiply_color(color)}
 }
 
-emit_rectangle :: proc(x, y, width, height: f32, color: draw.Color, vertices: []draw.Vertex, offset: int) {
+//INTERNAL
+emit_rectangle :: proc(
+	x, y, width, height: f32,
+	color: draw.Color,
+	vertices: []draw.Vertex_2D,
+	offset: int,
+) {
 	vertices[offset + 0] = solid_vertex({x, y}, color)
 	vertices[offset + 1] = solid_vertex({x + width, y}, color)
 	vertices[offset + 2] = solid_vertex({x + width, y + height}, color)
@@ -35,11 +43,12 @@ emit_rectangle :: proc(x, y, width, height: f32, color: draw.Color, vertices: []
 	vertices[offset + 5] = solid_vertex({x, y + height}, color)
 }
 
+//INTERNAL
 extrude_line :: proc(
 	start, end_pos: draw.Vec2,
 	thickness: f32,
 	color: draw.Color,
-	vertices: []draw.Vertex,
+	vertices: []draw.Vertex_2D,
 	offset: int,
 ) -> int {
 	direction := end_pos - start
@@ -69,7 +78,7 @@ extrude_line :: proc(
 // ----- Public draw -----
 
 pixel :: proc(layer: ^draw.Layer, pos: draw.Vec2, color: draw.Color) {
-	vertices: [6]draw.Vertex
+	vertices: [6]draw.Vertex_2D
 	emit_rectangle(pos[0], pos[1], 1, 1, color, vertices[:], 0)
 	draw.prepare_shape(layer, vertices[:])
 }
@@ -82,7 +91,7 @@ triangle :: proc(
 	rotation: f32 = 0,
 ) {
 	if !draw.needs_transform(origin, rotation) {
-		vertices := [3]draw.Vertex{solid_vertex(v1, color), solid_vertex(v2, color), solid_vertex(v3, color)}
+		vertices := [3]draw.Vertex_2D{solid_vertex(v1, color), solid_vertex(v2, color), solid_vertex(v3, color)}
 		draw.prepare_shape(layer, vertices[:])
 		return
 	}
@@ -91,7 +100,7 @@ triangle :: proc(
 	local_v1 := v1 - bounds_min
 	local_v2 := v2 - bounds_min
 	local_v3 := v3 - bounds_min
-	vertices := [3]draw.Vertex {
+	vertices := [3]draw.Vertex_2D {
 		solid_vertex(draw.apply_transform(transform, local_v1), color),
 		solid_vertex(draw.apply_transform(transform, local_v2), color),
 		solid_vertex(draw.apply_transform(transform, local_v3), color),
@@ -170,7 +179,7 @@ triangle_aa :: proc(
 	transparent := draw.BLANK
 
 	// 3 interior + 6 × 3 edge-quad = 21 vertices
-	vertices: [21]draw.Vertex
+	vertices: [21]draw.Vertex_2D
 
 	// Interior triangle
 	vertices[0] = solid_vertex(p0, color)
@@ -213,7 +222,7 @@ triangle_lines :: proc(
 	rotation: f32 = 0,
 	temp_allocator := context.temp_allocator,
 ) {
-	vertices := make([]draw.Vertex, 18, temp_allocator)
+	vertices := make([]draw.Vertex_2D, 18, temp_allocator)
 	defer delete(vertices, temp_allocator)
 	write_offset := 0
 
@@ -249,7 +258,7 @@ triangle_fan :: proc(
 
 	triangle_count := len(points) - 2
 	vertex_count := triangle_count * 3
-	vertices := make([]draw.Vertex, vertex_count, temp_allocator)
+	vertices := make([]draw.Vertex_2D, vertex_count, temp_allocator)
 	defer delete(vertices, temp_allocator)
 
 	if !draw.needs_transform(origin, rotation) {
@@ -289,7 +298,7 @@ triangle_strip :: proc(
 
 	triangle_count := len(points) - 2
 	vertex_count := triangle_count * 3
-	vertices := make([]draw.Vertex, vertex_count, temp_allocator)
+	vertices := make([]draw.Vertex_2D, vertex_count, temp_allocator)
 	defer delete(vertices, temp_allocator)
 
 	if !draw.needs_transform(origin, rotation) {
diff --git a/draw/text.odin b/draw/text.odin
index 4ebdd1f..5849f71 100644
--- a/draw/text.odin
+++ b/draw/text.odin
@@ -8,21 +8,25 @@ import sdl_ttf "vendor:sdl3/ttf"
 
 Font_Id :: u16
 
+//INTERNAL
 Font_Key :: struct {
 	id:   Font_Id,
 	size: u16,
 }
 
+//INTERNAL
 Cache_Source :: enum u8 {
 	Custom,
 	Clay,
 }
 
+//INTERNAL
 Cache_Key :: struct {
 	id:     u32,
 	source: Cache_Source,
 }
 
+//INTERNAL
 Text_Cache :: struct {
 	engine:     ^sdl_ttf.TextEngine,
 	font_bytes: [dynamic][]u8,
@@ -30,7 +34,8 @@ Text_Cache :: struct {
 	cache:      map[Cache_Key]^sdl_ttf.Text,
 }
 
-// Internal for fetching SDL TTF font pointer for rendering
+// Fetch SDL TTF font pointer for rendering.
+//INTERNAL
 get_font :: proc(id: Font_Id, size: u16) -> ^sdl_ttf.Font {
 	assert(int(id) < len(GLOB.text_cache.font_bytes), "Invalid font ID.")
 	key := Font_Key{id, size}
@@ -77,6 +82,7 @@ register_font :: proc(bytes: []u8) -> (id: Font_Id, ok: bool) #optional_ok {
 	return Font_Id(len(GLOB.text_cache.font_bytes) - 1), true
 }
 
+//INTERNAL
 Text :: struct {
 	sdl_text: ^sdl_ttf.Text,
 	position: Vec2,
@@ -89,7 +95,7 @@ Text :: struct {
 
 // Shared cache lookup/create/update logic used by both the `text` proc and the Clay render path.
 // Returns the cached (or newly created) TTF_Text pointer.
-@(private)
+//INTERNAL
 cache_get_or_update :: proc(key: Cache_Key, c_str: cstring, font: ^sdl_ttf.Font) -> ^sdl_ttf.Text {
 	existing, found := GLOB.text_cache.cache[key]
 	if !found {
@@ -268,7 +274,8 @@ clear_text_cache_entry :: proc(id: u32) {
 // ----- Internal cache lifecycle ------
 // ---------------------------------------------------------------------------------------------------------------------
 
-@(private, require_results)
+//INTERNAL
+@(require_results)
 init_text_cache :: proc(
 	device: ^sdl.GPUDevice,
 	allocator := context.allocator,
@@ -299,6 +306,7 @@ init_text_cache :: proc(
 	return text_cache, true
 }
 
+//INTERNAL
 destroy_text_cache :: proc() {
 	for _, font in GLOB.text_cache.sdl_fonts {
 		sdl_ttf.CloseFont(font)
diff --git a/draw/textures.odin b/draw/textures.odin
index b9e5b31..a48ab57 100644
--- a/draw/textures.odin
+++ b/draw/textures.odin
@@ -14,8 +14,8 @@ Texture_Kind :: enum u8 {
 }
 
 Sampler_Preset :: enum u8 {
-	Nearest_Clamp,
 	Linear_Clamp,
+	Nearest_Clamp,
 	Nearest_Repeat,
 	Linear_Repeat,
 }
@@ -41,8 +41,7 @@ Texture_Desc :: struct {
 	kind:            Texture_Kind,
 }
 
-// Internal slot — not exported.
-@(private)
+//INTERNAL
 Texture_Slot :: struct {
 	gpu_texture: ^sdl.GPUTexture,
 	desc:        Texture_Desc,
@@ -319,8 +318,8 @@ texture_kind :: proc(id: Texture_Id) -> Texture_Kind {
 	return GLOB.texture_slots[u32(id)].desc.kind
 }
 
-// Internal: get the raw GPU texture pointer for binding during draw.
-@(private)
+// Get the raw GPU texture pointer for binding during draw.
+//INTERNAL
 texture_gpu_handle :: proc(id: Texture_Id) -> ^sdl.GPUTexture {
 	if id == INVALID_TEXTURE do return nil
 	idx := u32(id)
@@ -328,8 +327,8 @@ texture_gpu_handle :: proc(id: Texture_Id) -> ^sdl.GPUTexture {
 	return GLOB.texture_slots[idx].gpu_texture
 }
 
-// Deferred release (called from draw.end / clear_global)
-@(private)
+// Deferred release (called from end / clear_global).
+//INTERNAL
 process_pending_texture_releases :: proc() {
 	device := GLOB.device
 	for id in GLOB.pending_texture_releases {
@@ -346,7 +345,7 @@ process_pending_texture_releases :: proc() {
 	clear(&GLOB.pending_texture_releases)
 }
 
-@(private)
+//INTERNAL
 get_sampler :: proc(preset: Sampler_Preset) -> ^sdl.GPUSampler {
 	idx := int(preset)
 	if GLOB.samplers[idx] != nil do return GLOB.samplers[idx]
@@ -379,15 +378,15 @@ get_sampler :: proc(preset: Sampler_Preset) -> ^sdl.GPUSampler {
 	)
 	if sampler == nil {
 		log.errorf("Failed to create sampler preset %v: %s", preset, sdl.GetError())
-		return GLOB.pipeline_2d_base.sampler // fallback to existing default sampler
+		return GLOB.core_2d.sampler // fallback to existing default sampler
 	}
 
 	GLOB.samplers[idx] = sampler
 	return sampler
 }
 
-// Internal: destroy all sampler pool entries. Called from draw.destroy().
-@(private)
+// Destroy all sampler pool entries. Called from destroy().
+//INTERNAL
 destroy_sampler_pool :: proc() {
 	device := GLOB.device
 	for &s in GLOB.samplers {
@@ -398,8 +397,8 @@ destroy_sampler_pool :: proc() {
 	}
 }
 
-// Internal: destroy all registered textures. Called from draw.destroy().
-@(private)
+// Destroy all registered textures. Called from destroy().
+//INTERNAL
 destroy_all_textures :: proc() {
 	device := GLOB.device
 	for &slot in GLOB.texture_slots {
diff --git a/qrcode/examples/examples.odin b/qrcode/examples/examples.odin
index a3d4be1..d03d820 100644
--- a/qrcode/examples/examples.odin
+++ b/qrcode/examples/examples.odin
@@ -9,46 +9,45 @@ import qr ".."
 
 main :: proc() {
 	//----- General setup ----------------------------------
-	{
-		// Temp
-		track_temp: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track_temp, context.temp_allocator)
-		context.temp_allocator = mem.tracking_allocator(&track_temp)
+	// Temp
+	track_temp: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track_temp, context.temp_allocator)
+	context.temp_allocator = mem.tracking_allocator(&track_temp)
 
-		// Default
-		track: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track, context.allocator)
-		context.allocator = mem.tracking_allocator(&track)
-		// Log a warning about any memory that was not freed by the end of the program.
-		// This could be fine for some global state or it could be a memory leak.
-		defer {
-			// Temp allocator
-			if len(track_temp.bad_free_array) > 0 {
-				fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
-				for entry in track_temp.bad_free_array {
-					fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
-				}
-				mem.tracking_allocator_destroy(&track_temp)
+	// Default
+	track: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track, context.allocator)
+	context.allocator = mem.tracking_allocator(&track)
+	// Log a warning about any memory that was not freed by the end of the program.
+	// This could be fine for some global state or it could be a memory leak.
+	defer {
+		// Temp allocator
+		if len(track_temp.bad_free_array) > 0 {
+			fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
+			for entry in track_temp.bad_free_array {
+				fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
 			}
-			// Default allocator
-			if len(track.allocation_map) > 0 {
-				fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
-				for _, entry in track.allocation_map {
-					fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
-				}
-			}
-			if len(track.bad_free_array) > 0 {
-				fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
-				for entry in track.bad_free_array {
-					fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
-				}
-			}
-			mem.tracking_allocator_destroy(&track)
+			mem.tracking_allocator_destroy(&track_temp)
 		}
-		// Logger
-		context.logger = log.create_console_logger()
-		defer log.destroy_console_logger(context.logger)
+		// Default allocator
+		if len(track.allocation_map) > 0 {
+			fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
+			for _, entry in track.allocation_map {
+				fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
+			}
+		}
+		if len(track.bad_free_array) > 0 {
+			fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
+			for entry in track.bad_free_array {
+				fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
+			}
+		}
+		mem.tracking_allocator_destroy(&track)
 	}
+	// Logger
+	context.logger = log.create_console_logger()
+	defer log.destroy_console_logger(context.logger)
+
 
 	args := os.args
 	if len(args) < 2 {
diff --git a/vendor/lmdb/examples/examples.odin b/vendor/lmdb/examples/examples.odin
index 4a2a805..4cc2d6b 100644
--- a/vendor/lmdb/examples/examples.odin
+++ b/vendor/lmdb/examples/examples.odin
@@ -14,46 +14,45 @@ DB_PATH :: "out/debug/lmdb_example_db"
 
 main :: proc() {
 	//----- General setup ----------------------------------
-	{
-		// Temp
-		track_temp: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track_temp, context.temp_allocator)
-		context.temp_allocator = mem.tracking_allocator(&track_temp)
+	// Temp
+	track_temp: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track_temp, context.temp_allocator)
+	context.temp_allocator = mem.tracking_allocator(&track_temp)
 
-		// Default
-		track: mem.Tracking_Allocator
-		mem.tracking_allocator_init(&track, context.allocator)
-		context.allocator = mem.tracking_allocator(&track)
-		// Log a warning about any memory that was not freed by the end of the program.
-		// This could be fine for some global state or it could be a memory leak.
-		defer {
-			// Temp allocator
-			if len(track_temp.bad_free_array) > 0 {
-				fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
-				for entry in track_temp.bad_free_array {
-					fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
-				}
-				mem.tracking_allocator_destroy(&track_temp)
+	// Default
+	track: mem.Tracking_Allocator
+	mem.tracking_allocator_init(&track, context.allocator)
+	context.allocator = mem.tracking_allocator(&track)
+	// Log a warning about any memory that was not freed by the end of the program.
+	// This could be fine for some global state or it could be a memory leak.
+	defer {
+		// Temp allocator
+		if len(track_temp.bad_free_array) > 0 {
+			fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
+			for entry in track_temp.bad_free_array {
+				fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
 			}
-			// Default allocator
-			if len(track.allocation_map) > 0 {
-				fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
-				for _, entry in track.allocation_map {
-					fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
-				}
-			}
-			if len(track.bad_free_array) > 0 {
-				fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
-				for entry in track.bad_free_array {
-					fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
-				}
-			}
-			mem.tracking_allocator_destroy(&track)
+			mem.tracking_allocator_destroy(&track_temp)
 		}
-		// Logger
-		context.logger = log.create_console_logger()
-		defer log.destroy_console_logger(context.logger)
+		// Default allocator
+		if len(track.allocation_map) > 0 {
+			fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
+			for _, entry in track.allocation_map {
+				fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
+			}
+		}
+		if len(track.bad_free_array) > 0 {
+			fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
+			for entry in track.bad_free_array {
+				fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
+			}
+		}
+		mem.tracking_allocator_destroy(&track)
 	}
+	// Logger
+	context.logger = log.create_console_logger()
+	defer log.destroy_console_logger(context.logger)
+
 
 	environment: ^mdb.Env