Clay custom dispatch improvements & DPI scaling fixes (#26)

Co-authored-by: Zachary Levy <zachary@sunforge.is>
Reviewed-on: #26
This commit was merged in pull request #26.
This commit is contained in:
2026-05-06 04:17:24 +00:00
parent e8ffa28de3
commit 43f08ed30c
19 changed files with 627 additions and 407 deletions
+216 -131
View File
@@ -1,3 +1,66 @@
// Rendering library built on SDL3 GPU.
//
// ----- Coordinate system -----
// Origin is the top-left corner of the window/layer. X increases rightward, Y increases
// downward. This matches SDL, HTML Canvas, and most 2D UI coordinate conventions. All
// public position parameters (`center`, `origin`, `start_position`, `end_position`, every
// `Vec2`-typed field, every `Rectangle.x/y`, etc.) live in this coordinate system.
//
// ----- Unit-suffix convention -----
// Public CPU-side dimensions are in *logical* pixels by default (CSS-style: a value of 200
// looks the same physical size on a 1× monitor and a 2× Retina display). Suffix rules:
//
// no suffix — logical pixels. Default for layout values (positions, sizes, radii,
// outline widths, line thicknesses, gradient endpoints, etc.).
// `_lpx` — logical pixels, *explicit*. Optional. Use when an identifier would
// otherwise be ambiguous about which kind of pixel it carries —
// typically standalone constants like `SCANLINE_STRIPE_LPX` where the
// context doesn't make the unit obvious from the surrounding code.
// Procedure parameters and struct fields named after a layout property
// (`width`, `radius`, ...) don't need this suffix.
// `_ppx` — physical (device) pixels. Required whenever a value is in physical
// pixels, regardless of context. Reserved for quantities whose
// right-feeling magnitude is a property of the device pixel grid rather
// than of the layout: anti-aliasing band widths, sub-pixel snap targets,
// MSDF screen-pixel-range parameters.
//
// Examples:
//
// width, height, radius, outline_width, thickness — logical px (no suffix)
// SCANLINE_STRIPE_LPX, SCANLINE_GAP_LPX — logical px (explicit `_lpx`)
// feather_ppx, aa_ppx — physical px (`_ppx`)
//
// Layout values scale with DPI; rasterization-grid values do not. The shader handles the
// logical-to-physical conversion at the rasterization boundary; CPU-side `_ppx` inputs that
// need to interact with logical-space data convert via `/ dpi_scaling` at the use site.
//
// ----- Anti-aliasing -----
// MSAA is intentionally NOT supported. SDF text and shapes compute fragment coverage
// analytically via `smoothstep`, so they don't benefit from multisampling. Tessellated
// user geometry submitted via `prepare_shape` is rendered without anti-aliasing — if AA is
// required for tessellated content, the caller must either render it to their own offscreen
// target and submit the result as a texture, or use the AA helpers in the `tess` subpackage
// (e.g. `tess.triangle_aa` extrudes 1-physical-pixel alpha-falloff edge bands). This
// decision aligns with the SBC target (Mali Valhall, where MSAA's per-tile bandwidth
// multiplier is expensive) and matches RAD Debugger's architecture.
//
// ----- Color and blending -----
// `Color` is RGBA8 in memory order (R, G, B, A at indices 0..3). The shader unpacks via
// `unpackUnorm4x8`, which reads bytes in that exact order. Alpha 255 = fully opaque, 0 =
// fully transparent.
//
// All rendering uses *premultiplied-over* blending (blend state ONE, ONE_MINUS_SRC_ALPHA —
// the standard mode used by Skia, Flutter, and GPUI). Three implications:
//
// - Public shape procs (`rectangle`, `circle`, `line`, etc.) accept straight-alpha
// `Color` values and the SDF fragment shaders premultiply internally; users of these
// procs don't need to think about premultiplication.
// - Vertex colors written to the shared vertex stream (the tessellated path — text and
// anything submitted via `prepare_shape`, including `tess.*` helpers) MUST be
// premultiplied at the CPU. The tessellated fragment shader passes vertex color through
// directly without further modification. The `premultiply_color` helper handles this.
// - The clear color passed to `end()` is also premultiplied internally before being
// handed to the GPU; callers pass straight-alpha `Color` here too.
package draw
import "base:runtime"
@@ -51,7 +114,7 @@ INITIAL_SCISSOR_SIZE :: 10
// ----- Default parameter values -----
// Named constants for non-zero default procedure parameters. Centralizes magic numbers
// so they can be tuned in one place and referenced by name in proc signatures.
DFT_FEATHER_PX :: 1 // Total AA feather width in physical pixels (half on each side of boundary).
DFT_FEATHER_PPX :: 1 // Total AA feather width in physical pixels (half on each side of boundary).
DFT_STROKE_THICKNESS :: 1 // Default line/stroke thickness in logical pixels.
DFT_FONT_SIZE :: 44 // Default font size in points for text rendering.
DFT_CIRC_END_ANGLE :: 360 // Full-circle end angle in degrees (ring/arc).
@@ -132,27 +195,13 @@ Global :: struct {
// ---------------------------------------------------------------------------------------------------------------------
// A 2D position in world space. Non-distinct alias for [2]f32 — bare literals like {100, 200}
// work at non-ambiguous call sites.
//
// Coordinate system: origin is the top-left corner of the window/layer. X increases rightward,
// Y increases downward. This matches SDL, HTML Canvas, and most 2D UI coordinate conventions.
// All position parameters in the draw API (center, origin, start_position, end_position, etc.)
// use this coordinate system.
//
// Units are logical pixels (pre-DPI-scaling). The renderer multiplies by dpi_scaling internally
// before uploading to the GPU. A Vec2{100, 50} refers to the same visual location regardless of
// display DPI.
// work at non-ambiguous call sites. See the package doc for coordinate-system and unit
// conventions.
Vec2 :: [2]f32
// An RGBA color with 8 bits per channel. Distinct type over [4]u8 so that proc-group
// overloads can disambiguate Color from other 4-byte structs.
//
// Channel order: R, G, B, A (indices 0, 1, 2, 3). Alpha 255 is fully opaque, 0 is fully
// transparent. This matches the GPU-side layout: the shader unpacks via unpackUnorm4x8 which
// reads the bytes in memory order as R, G, B, A and normalizes each to [0, 1].
//
// When used in the Core_2D_Primitive or Gaussian_Blur_Primitive structs (e.g. .color), the 4 bytes
// are stored as a u32 in native byte order and unpacked by the shader.
// overloads can disambiguate Color from other 4-byte structs. See the package doc for the
// memory layout and the premultiplied-over blending contract.
Color :: [4]u8
BLACK :: Color{0, 0, 0, 255}
@@ -228,10 +277,9 @@ color_to_f32 :: proc(color: Color) -> [4]f32 {
return {f32(color[0]) * INV, f32(color[1]) * INV, f32(color[2]) * INV, f32(color[3]) * INV}
}
// Pre-multiply RGB channels by alpha. The tessellated vertex path and text path require
// premultiplied colors because the blend state is ONE, ONE_MINUS_SRC_ALPHA and the
// tessellated fragment shader passes vertex color through without further modification.
// Users who construct Vertex_2D structs manually for prepare_shape must premultiply their colors.
// Pre-multiply RGB channels by alpha. Required for any vertex written to the tessellated
// vertex stream (text path or `prepare_shape`-style submissions); see the package doc's
// "Color and blending" section for the full contract.
premultiply_color :: #force_inline proc(color: Color) -> Color {
a := u32(color[3])
return Color {
@@ -249,7 +297,7 @@ premultiply_color :: #force_inline proc(color: Color) -> Color {
//INTERNAL
Sub_Batch_Kind :: enum u8 {
Tessellated, // non-indexed, white texture or user texture, Core_2D_Mode.Tessellated
Text, // indexed, atlas texture, Core_2D_Mode.Tessellated
Text, // indexed, atlas texture, Core_2D_Mode.Text (vertices already in physical-pixel space)
SDF, // instanced unit quad, Core_2D_Mode.SDF
// instanced unit quad, backdrop subsystem V-composite (indexes Gaussian_Blur_Primitive).
// Bracket-scheduled per layer; see README.md § "Backdrop pipeline" for ordering semantics.
@@ -289,12 +337,6 @@ Scissor :: struct {
// ---------------------------------------------------------------------------------------------------------------------
// Initialize the renderer. Returns false if GPU pipeline or text engine creation fails.
//
// MSAA is intentionally NOT supported. SDF text and shapes compute coverage analytically via
// `smoothstep`, so they don't benefit from multisampling. Tessellated user geometry submitted
// via `prepare_shape` is not anti-aliased — if you need AA on tessellated content, render it
// to your own offscreen target and submit it as a texture. RAD Debugger and the SBC target
// (Mali Valhall, where MSAA's per-tile bandwidth multiplier is expensive) drove this decision.
@(require_results)
init :: proc(
device: ^sdl.GPUDevice,
@@ -446,30 +488,6 @@ clear_global :: proc() {
// ----- Frame ------------
// ---------------------------------------------------------------------------------------------------------------------
// Sets up renderer to begin upload to the GPU. Returns starting `Layer` to begin processing primitives for.
begin :: proc(bounds: Rectangle) -> ^Layer {
// Cleanup
clear_global()
// Begin new layer + start a new scissor
scissor := Scissor {
bounds = sdl.Rect {
x = i32(bounds.x * GLOB.dpi_scaling),
y = i32(bounds.y * GLOB.dpi_scaling),
w = i32(bounds.width * GLOB.dpi_scaling),
h = i32(bounds.height * GLOB.dpi_scaling),
},
}
append(&GLOB.scissors, scissor)
layer := Layer {
bounds = bounds,
scissor_len = 1,
}
append(&GLOB.layers, layer)
return &GLOB.layers[GLOB.curr_layer_index]
}
// Creates a new layer
new_layer :: proc(prev_layer: ^Layer, bounds: Rectangle) -> ^Layer {
if GLOB.open_backdrop_layer != nil {
@@ -499,44 +517,28 @@ new_layer :: proc(prev_layer: ^Layer, bounds: Rectangle) -> ^Layer {
return &GLOB.layers[GLOB.curr_layer_index]
}
// Open a backdrop scope on `layer`. All subsequent draws on `layer` until the matching
// `end_backdrop` must be backdrop primitives (currently only `backdrop_blur`). Non-backdrop
// draws inside a scope, or backdrop draws outside one, panic.
//
// Bracket scheduling: each scope produces one bracket at render time. Within the scope,
// per-sigma sub-batch coalescing still applies (two contiguous backdrop_blur calls with
// the same sigma share an instanced composite draw and a single H+V blur pass pair).
//
// Multiple begin/end pairs per layer are allowed: each pair is its own bracket, and
// non-backdrop draws between pairs render in their submission position relative to the
// brackets. Use this for layered frost effects.
begin_backdrop :: proc(layer: ^Layer) {
if GLOB.open_backdrop_layer != nil {
log.panicf("begin_backdrop called while a scope is already open on layer %p", GLOB.open_backdrop_layer)
}
GLOB.open_backdrop_layer = layer
}
// Sets up renderer to begin upload to the GPU. Returns starting `Layer` to begin processing primitives for.
begin :: proc(bounds: Rectangle) -> ^Layer {
// Cleanup
clear_global()
// Close the backdrop scope opened by `begin_backdrop`. Must be called on the same layer that
// the scope was opened on; the layer pointer mismatch is a hard error rather than a silent
// recovery to surface integration bugs early.
end_backdrop :: proc(layer: ^Layer) {
if GLOB.open_backdrop_layer != layer {
log.panicf("end_backdrop on wrong layer (open=%p, ended=%p)", GLOB.open_backdrop_layer, layer)
// Begin new layer + start a new scissor
scissor := Scissor {
bounds = sdl.Rect {
x = i32(bounds.x * GLOB.dpi_scaling),
y = i32(bounds.y * GLOB.dpi_scaling),
w = i32(bounds.width * GLOB.dpi_scaling),
h = i32(bounds.height * GLOB.dpi_scaling),
},
}
GLOB.open_backdrop_layer = nil
}
append(&GLOB.scissors, scissor)
// Convenience wrapper for the common case of a backdrop scope tied to a block. Use with
// defer-style block scoping:
//
// {
// draw.backdrop_scope(layer)
// draw.backdrop_blur(layer, ...)
// } // end_backdrop fires automatically
@(deferred_in = end_backdrop)
backdrop_scope :: #force_inline proc(layer: ^Layer) {
begin_backdrop(layer)
layer := Layer {
bounds = bounds,
scissor_len = 1,
}
append(&GLOB.layers, layer)
return &GLOB.layers[GLOB.curr_layer_index]
}
// Render primitives. clear_color is the background fill before any layers are drawn.
@@ -625,6 +627,46 @@ end :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window, clear_color: Color = DF
}
}
// Open a backdrop scope on `layer`. All subsequent draws on `layer` until the matching
// `end_backdrop` must be backdrop primitives (currently only `backdrop_blur`). Non-backdrop
// draws inside a scope, or backdrop draws outside one, panic.
//
// Bracket scheduling: each scope produces one bracket at render time. Within the scope,
// per-sigma sub-batch coalescing still applies (two contiguous backdrop_blur calls with
// the same sigma share an instanced composite draw and a single H+V blur pass pair).
//
// Multiple begin/end pairs per layer are allowed: each pair is its own bracket, and
// non-backdrop draws between pairs render in their submission position relative to the
// brackets. Use this for layered frost effects.
begin_backdrop :: proc(layer: ^Layer) {
if GLOB.open_backdrop_layer != nil {
log.panicf("begin_backdrop called while a scope is already open on layer %p", GLOB.open_backdrop_layer)
}
GLOB.open_backdrop_layer = layer
}
// Close the backdrop scope opened by `begin_backdrop`. Must be called on the same layer that
// the scope was opened on; the layer pointer mismatch is a hard error rather than a silent
// recovery to surface integration bugs early.
end_backdrop :: proc(layer: ^Layer) {
if GLOB.open_backdrop_layer != layer {
log.panicf("end_backdrop on wrong layer (open=%p, ended=%p)", GLOB.open_backdrop_layer, layer)
}
GLOB.open_backdrop_layer = nil
}
// Convenience wrapper for the common case of a backdrop scope tied to a block. Use with
// defer-style block scoping:
//
// {
// draw.backdrop_scope(layer)
// draw.backdrop_blur(layer, ...)
// } // end_backdrop fires automatically
@(deferred_in = end_backdrop)
backdrop_scope :: #force_inline proc(layer: ^Layer) {
begin_backdrop(layer)
}
// ---------------------------------------------------------------------------------------------------------------------
// ----- Sub-batch dispatch ------------
// ---------------------------------------------------------------------------------------------------------------------
@@ -712,14 +754,18 @@ measure_text_clay :: proc "c" (
}
// Called for each Clay `RenderCommandType.Custom` render command that
// `prepare_clay_batch` encounters.
// `prepare_clay_batch` encounters and which is NOT a levlib-managed variant
// (e.g. `Backdrop_Marker`).
//
// - `layer` is the layer the command belongs to (post-z-index promotion).
// - `bounds` is already translated into the active layer's coordinate system
// and pre-DPI, matching what the built-in shape procs expect.
// - `render_data` is Clay's `CustomRenderData` for the element, exposing
// `backgroundColor`, `cornerRadius`, and the `customData` pointer the caller
// attached to `clay.CustomElementConfig.customData`.
// `backgroundColor` and `cornerRadius`. Its `customData` field has been
// unwrapped from the `Clay_Custom` envelope: it points at the user's own
// data (the value the user wrote into the `rawptr` variant), not at the
// `Clay_Custom` itself. If the union was zero-init (no variant set) or
// `customData` was originally nil, the callback receives nil.
//
// The callback must not call `new_layer` or `prepare_clay_batch`.
Custom_Draw :: #type proc(layer: ^Layer, bounds: Rectangle, render_data: clay.CustomRenderData)
@@ -729,33 +775,51 @@ ClayBatch :: struct {
cmds: clay.ClayArray(clay.RenderCommand),
}
// Magic-number-tagged struct that user app data points at via Clay's customData field.
// `prepare_clay_batch` recognizes these and routes them through a backdrop scope automatically.
// The user populates a `Backdrop_Marker`, points `clay.CustomElementConfig.customData` at it,
// and the integration walks the command stream, opening/closing scopes around contiguous
// backdrop runs. Magic-number sentinel chosen over a separate userData flag so the marker
// type stays self-describing in core dumps and in any non-Odin debugger view of the heap.
Backdrop_Marker :: struct {
magic: u32,
sigma: f32,
tint: Color,
radii: Rectangle_Radii,
feather_px: f32,
// Discriminated sum of everything `clay.CustomElementConfig.customData` is allowed to point
// at. levlib-defined variants (currently just `Backdrop_Marker`) are recognized by
// `prepare_clay_batch` and routed to the appropriate internal path; the `rawptr` variant is
// the escape hatch for user-defined custom drawing — `prepare_clay_batch` unwraps it before
// invoking `custom_draw` so the callback sees the user's pointer in `render_data.customData`
// exactly as if no wrapper were involved.
//
// Contract: `customData`, when non-nil, MUST point at storage holding a `Clay_Custom`
// value. The user owns that storage; its lifetime must span the Clay layout call and the
// matching `prepare_clay_batch` call. Pointing `customData` at a bare user struct violates
// the contract — the dispatcher will read its first bytes as a union tag and either route
// the draw incorrectly or panic on type assertion. There is no recovery path; this is a
// strict-discipline API by design.
//
// Construction notes (Odin implicit-conversion rules):
// - Backdrop variant: `bd: Clay_Custom = Backdrop_Marker{...}` works directly.
// Variant-to-union conversion is implicit.
// - User pointer: `up: Clay_Custom = rawptr(&my_struct)` — the explicit `rawptr(...)` is
// required because Odin does not chain `^T -> rawptr -> Clay_Custom` implicitly. A bare
// `up: Clay_Custom = &my_struct` is a compile error.
Clay_Custom :: union {
Backdrop_Marker,
rawptr,
}
// 'BDPT' in big-endian ASCII. Picked for greppability and to be obviously non-zero in
// uninitialized memory; user code that forgets to set the magic field gets routed through
// the regular custom_draw path and surfaces as "my custom draw never fired," not as a
// silent backdrop schedule.
BACKDROP_MARKER_MAGIC :: u32(0x42445054)
// Per-primitive parameters for a backdrop blur dispatched through the Clay integration.
// Embedded as a `Clay_Custom` variant; `prepare_clay_batch` walks the command stream,
// opens/closes a backdrop scope around contiguous backdrop runs, and feeds these to
// `backdrop_blur` via `dispatch_clay_backdrop`. The discriminant is the union tag — no
// in-band magic field needed (compiler-enforced).
Backdrop_Marker :: struct {
sigma: f32,
tint: Color,
radii: Rectangle_Radii,
feather_ppx: f32,
}
// Returns true if this Clay render command represents a backdrop primitive.
// Identified by a magic-number sentinel in the first 4 bytes of customData.
// Returns true if this Clay render command represents a backdrop primitive — i.e. its
// `customData` points at a `Clay_Custom` whose active variant is `Backdrop_Marker`.
is_clay_backdrop :: proc(cmd: ^clay.RenderCommand) -> bool {
if cmd.commandType != .Custom do return false
p := cmd.renderData.custom.customData
if p == nil do return false
return (^Backdrop_Marker)(p).magic == BACKDROP_MARKER_MAGIC
_, ok := (^Clay_Custom)(p).(Backdrop_Marker)
return ok
}
// Dispatch a single non-backdrop Clay render command to the appropriate `draw` primitive.
@@ -876,28 +940,46 @@ dispatch_clay_command :: proc(
}
rectangle(layer, bounds, BLANK, outline_color = color, outline_width = thickness, radii = radii)
case clay.RenderCommandType.Custom: if is_clay_backdrop(render_command) {
// The walker pre-filters backdrops into `dispatch_clay_backdrop` and never feeds
// them here; reaching this branch means either the walker logic is broken or the
// `customData` pointee mutated between the walker's `is_clay_backdrop` check and
// this re-check (heap corruption / lifetime bug in user-managed customData
// memory). Both are renderer-level bugs that warrant a hard failure rather than a
// silently-dropped panel.
log.panicf(
"backdrop marker reached dispatch_clay_command; either the prepare_clay_batch walker is misrouting commands or the customData pointee at %p was mutated mid-frame",
render_command.renderData.custom.customData,
)
} else if custom_draw != nil {
custom_draw(layer, bounds, render_command.renderData.custom)
} else {
log.panicf("Received clay render command of type custom but no custom_draw proc provided.")
case clay.RenderCommandType.Custom:
// Copy the CustomRenderData by value so we can patch its `customData` field for the
// user callback without mutating Clay-owned memory. After unwrapping, the callback
// sees its own pointer in `render_data.customData`, identical to what it would see
// if `Clay_Custom` did not exist as an intermediary.
patched := render_command.renderData.custom
// Default to nil so a zero-init `Clay_Custom` (no variant set) and an originally-nil
// `customData` both surface to the callback as `customData = nil`.
patched.customData = nil
if custom_data_pointer := render_command.renderData.custom.customData; custom_data_pointer != nil {
switch custom_value in (^Clay_Custom)(custom_data_pointer)^ {
case Backdrop_Marker: // The walker pre-filters backdrops into `dispatch_clay_backdrop` and never feeds
// them here; reaching this branch means either the walker logic is broken or the
// `Clay_Custom` variant tag mutated between the walker's `is_clay_backdrop` check
// and this re-check (heap corruption / lifetime bug in user-managed customData
// memory). Both are renderer-level bugs that warrant a hard failure rather than a
// silently-dropped panel.
log.panicf(
"backdrop marker reached dispatch_clay_command; either the prepare_clay_batch walker is misrouting commands or the customData pointee at %p was mutated mid-frame",
render_command.renderData.custom.customData,
)
case rawptr: patched.customData = custom_value
}
}
if custom_draw != nil {
custom_draw(layer, bounds, patched)
} else if patched.customData != nil {
log.panicf(
"Received clay render command of type custom with non-nil user data but no custom_draw proc provided.",
)
}
}
}
// Dispatch a single backdrop Clay render command to `backdrop_blur` on the active layer.
// Caller guarantees a backdrop scope is open on `layer` so the underlying
// `append_or_extend_sub_batch` contract assertion is satisfied.
// Caller guarantees:
// - a backdrop scope is open on `layer` so the underlying `append_or_extend_sub_batch`
// contract assertion is satisfied;
// - the command's `customData` points at a `Clay_Custom` whose active variant is
// `Backdrop_Marker` (the walker has already verified this via `is_clay_backdrop`).
//INTERNAL
dispatch_clay_backdrop :: proc(layer: ^Layer, cmd: ^clay.RenderCommand) {
bounds := Rectangle {
@@ -906,14 +988,17 @@ dispatch_clay_backdrop :: proc(layer: ^Layer, cmd: ^clay.RenderCommand) {
width = cmd.boundingBox.width,
height = cmd.boundingBox.height,
}
marker := (^Backdrop_Marker)(cmd.renderData.custom.customData)
// Type-asserting form (no `, ok`): panics loudly if the variant tag changed since
// `is_clay_backdrop`, which is the desired tripwire for a heap-corruption bug in
// user-managed customData.
marker := (^Clay_Custom)(cmd.renderData.custom.customData).(Backdrop_Marker)
backdrop_blur(
layer,
bounds,
gaussian_sigma = marker.sigma,
tint = marker.tint,
radii = marker.radii,
feather_px = marker.feather_px,
feather_ppx = marker.feather_ppx,
)
}