Files
levlib/draw/backdrop.odin
T
zack e8ffa28de3 Backdrop scope implementation (#25)
Co-authored-by: Zachary Levy <zachary@sunforge.is>
Reviewed-on: #25
2026-05-02 01:31:58 +00:00

1171 lines
48 KiB
Odin
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package draw
import "core:log"
import "core:math"
import "core:mem"
import sdl "vendor:sdl3"
// This file hosts the backdrop subsystem: any visual effect that samples the current
// framebuffer as input. Today the only implemented effect is Gaussian blur (frosted glass);
// future effects (refraction, mirror, etc.) will live here too.
//
// The file is split into two top-level sections:
//
// 1. Shared backdrop infrastructure — bracket coordination, source_texture lifecycle,
// sub-batch scanners. These are general to any backdrop effect: every backdrop effect
// needs a snapshot of the framebuffer (source_texture) and needs to participate in the
// bracket render-pass-boundary scheduling. When a second effect is added, its
// per-effect resources go in their own section like the Gaussian blur one below; this
// shared section stays.
//
// 2. Gaussian blur — the only effect implemented today. Owns its own PSOs, working
// textures (downsample / h_blur), per-primitive storage layout, kernel math, and
// bracket-runner inner loop. None of this is shared with future backdrop effects: a
// refraction shader would have its own PSO, its own primitive struct, and likely
// wouldn't need the downsample/h_blur intermediates at all.
//
// The `Backdrop` struct currently holds resources from both categories; field-group
// comments inside it mark which are which. When a second effect lands the struct will be
// split, but doing that pre-emptively means inventing a per-effect dispatch protocol on
// speculation. Better to keep the conflation visible (and labeled) until concrete needs
// shape the design.
// ---------------------------------------------------------------------------------------------------------------------
// ----- Shared backdrop infrastructure ------------
// ---------------------------------------------------------------------------------------------------------------------
//INTERNAL
Backdrop :: struct {
// -- Shared across all backdrop effects --
// When any backdrop draw exists this frame, the entire frame renders into source_texture
// instead of the swapchain. Acts as the bracket's snapshot input by virtue of already
// containing the pre-bracket frame. Copied to the swapchain at frame end.
source_texture: ^sdl.GPUTexture,
// Cached pixel dimensions for resize-detection in `ensure_backdrop_textures`.
cached_width: u32,
cached_height: u32,
// Linear-clamp sampler used for sampling source_texture (and Gaussian blur's working
// textures). Linear filtering is required by the Gaussian linear-sampling pair trick;
// any future backdrop effect that samples source_texture with bilinear interpolation
// can reuse this sampler. Clamp avoids edge-bleed at work-region boundaries.
sampler: ^sdl.GPUSampler,
// -- Gaussian blur effect --
// Two graphics pipelines. The downsample PSO is a single-bilinear-sample fullscreen pass;
// the blur PSO is mode-branched (H-blur fullscreen + V-composite instanced) and shares
// one shader program for both modes via a uniform `mode` selector.
downsample_pipeline: ^sdl.GPUGraphicsPipeline,
blur_pipeline: ^sdl.GPUGraphicsPipeline,
// Per-instance Gaussian_Blur_Primitive storage buffer. Grows on demand via grow_buffer_if_needed.
// All backdrop primitives across all layers in a frame share this single buffer; sub-batches
// reference into it by offset.
primitive_buffer: Buffer,
// Working textures, allocated once at swapchain resolution and recreated only on resize.
// Both are sized at full swapchain resolution and single-sample. Larger downsample
// factors fill only a sub-rect via viewport-limited rendering (see file-header comment
// on adaptive downsampling in the Gaussian blur section below).
// downsample_texture — written by the downsample PSO. Read by the blur PSO in mode 0.
// h_blur_texture — written by the blur PSO in mode 0. Read by the blur PSO in mode 1.
downsample_texture: ^sdl.GPUTexture,
h_blur_texture: ^sdl.GPUTexture,
}
//INTERNAL
create_backdrop :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window) -> (pipeline: Backdrop, ok: bool) {
// On failure, clean up any partially-created resources.
defer if !ok {
if pipeline.sampler != nil do sdl.ReleaseGPUSampler(device, pipeline.sampler)
if pipeline.primitive_buffer.gpu != nil do destroy_buffer(device, &pipeline.primitive_buffer)
if pipeline.blur_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.blur_pipeline)
if pipeline.downsample_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.downsample_pipeline)
}
active_shader_formats := sdl.GetGPUShaderFormats(device)
if PLATFORM_SHADER_FORMAT_FLAG not_in active_shader_formats {
log.errorf(
"backdrop: no embedded shader matches active GPU formats; build supports %v but device reports %v",
PLATFORM_SHADER_FORMAT,
active_shader_formats,
)
return pipeline, false
}
swapchain_format := sdl.GetGPUSwapchainTextureFormat(device, window)
//----- Shader modules ----------------------------------
fullscreen_vert := sdl.CreateGPUShader(
device,
sdl.GPUShaderCreateInfo {
code_size = len(BACKDROP_FULLSCREEN_VERT_RAW),
code = raw_data(BACKDROP_FULLSCREEN_VERT_RAW),
entrypoint = SHADER_ENTRY,
format = {PLATFORM_SHADER_FORMAT_FLAG},
stage = .VERTEX,
},
)
if fullscreen_vert == nil {
log.errorf("Could not create backdrop fullscreen vertex shader: %s", sdl.GetError())
return pipeline, false
}
defer sdl.ReleaseGPUShader(device, fullscreen_vert)
downsample_frag := sdl.CreateGPUShader(
device,
sdl.GPUShaderCreateInfo {
code_size = len(BACKDROP_DOWNSAMPLE_FRAG_RAW),
code = raw_data(BACKDROP_DOWNSAMPLE_FRAG_RAW),
entrypoint = SHADER_ENTRY,
format = {PLATFORM_SHADER_FORMAT_FLAG},
stage = .FRAGMENT,
num_samplers = 1,
num_uniform_buffers = 1,
},
)
if downsample_frag == nil {
log.errorf("Could not create backdrop downsample fragment shader: %s", sdl.GetError())
return pipeline, false
}
defer sdl.ReleaseGPUShader(device, downsample_frag)
blur_vert := sdl.CreateGPUShader(
device,
sdl.GPUShaderCreateInfo {
code_size = len(BACKDROP_BLUR_VERT_RAW),
code = raw_data(BACKDROP_BLUR_VERT_RAW),
entrypoint = SHADER_ENTRY,
format = {PLATFORM_SHADER_FORMAT_FLAG},
stage = .VERTEX,
num_uniform_buffers = 1,
num_storage_buffers = 1,
},
)
if blur_vert == nil {
log.errorf("Could not create backdrop blur vertex shader: %s", sdl.GetError())
return pipeline, false
}
defer sdl.ReleaseGPUShader(device, blur_vert)
blur_frag := sdl.CreateGPUShader(
device,
sdl.GPUShaderCreateInfo {
code_size = len(BACKDROP_BLUR_FRAG_RAW),
code = raw_data(BACKDROP_BLUR_FRAG_RAW),
entrypoint = SHADER_ENTRY,
format = {PLATFORM_SHADER_FORMAT_FLAG},
stage = .FRAGMENT,
num_samplers = 1,
num_uniform_buffers = 1,
},
)
if blur_frag == nil {
log.errorf("Could not create backdrop blur fragment shader: %s", sdl.GetError())
return pipeline, false
}
defer sdl.ReleaseGPUShader(device, blur_frag)
//----- Downsample PSO ----------------------------------
// Single bilinear sample, blend disabled. No vertex buffer (gl_VertexIndex 0..2 emits the
// fullscreen triangle). Single-sample target (working textures are never MSAA).
downsample_target := sdl.GPUColorTargetDescription {
format = swapchain_format,
blend_state = sdl.GPUColorTargetBlendState{enable_blend = false},
}
pipeline.downsample_pipeline = sdl.CreateGPUGraphicsPipeline(
device,
sdl.GPUGraphicsPipelineCreateInfo {
vertex_shader = fullscreen_vert,
fragment_shader = downsample_frag,
primitive_type = .TRIANGLELIST,
multisample_state = sdl.GPUMultisampleState{sample_count = ._1},
target_info = sdl.GPUGraphicsPipelineTargetInfo {
color_target_descriptions = &downsample_target,
num_color_targets = 1,
},
},
)
if pipeline.downsample_pipeline == nil {
log.errorf("Failed to create backdrop downsample graphics pipeline: %s", sdl.GetError())
return pipeline, false
}
//----- Blur PSO (H-blur + V-composite, mode-branched) --------------
// Premultiplied-over blend matching the main pipeline. No vertex buffer (mode 0 uses
// gl_VertexIndex 0..2 fullscreen tri; mode 1 uses gl_VertexIndex 0..5 unit-quad +
// gl_InstanceIndex into the storage buffer).
//
// Single-sample throughout: levlib does not support MSAA (see init's doc comment in
// draw.odin). The whole frame renders to single-sample targets, so sample_count = ._1
// matches both mode 0 (writes h_blur_texture) and mode 1 (writes source_texture).
blur_target := sdl.GPUColorTargetDescription {
format = swapchain_format,
blend_state = sdl.GPUColorTargetBlendState {
enable_blend = true,
enable_color_write_mask = true,
src_color_blendfactor = .ONE,
dst_color_blendfactor = .ONE_MINUS_SRC_ALPHA,
color_blend_op = .ADD,
src_alpha_blendfactor = .ONE,
dst_alpha_blendfactor = .ONE_MINUS_SRC_ALPHA,
alpha_blend_op = .ADD,
color_write_mask = sdl.GPUColorComponentFlags{.R, .G, .B, .A},
},
}
pipeline.blur_pipeline = sdl.CreateGPUGraphicsPipeline(
device,
sdl.GPUGraphicsPipelineCreateInfo {
vertex_shader = blur_vert,
fragment_shader = blur_frag,
primitive_type = .TRIANGLELIST,
multisample_state = sdl.GPUMultisampleState{sample_count = ._1},
target_info = sdl.GPUGraphicsPipelineTargetInfo {
color_target_descriptions = &blur_target,
num_color_targets = 1,
},
},
)
if pipeline.blur_pipeline == nil {
log.errorf("Failed to create backdrop blur graphics pipeline: %s", sdl.GetError())
return pipeline, false
}
//----- Storage buffer for Gaussian_Blur_Primitive instances -------------
pipeline.primitive_buffer = create_buffer(
device,
size_of(Gaussian_Blur_Primitive) * BUFFER_INIT_SIZE,
sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ},
) or_return
//----- Sampler ----------------------------------
pipeline.sampler = sdl.CreateGPUSampler(
device,
sdl.GPUSamplerCreateInfo {
min_filter = .LINEAR,
mag_filter = .LINEAR,
mipmap_mode = .LINEAR,
address_mode_u = .CLAMP_TO_EDGE,
address_mode_v = .CLAMP_TO_EDGE,
address_mode_w = .CLAMP_TO_EDGE,
},
)
if pipeline.sampler == nil {
log.errorf("Could not create backdrop GPU sampler: %s", sdl.GetError())
return pipeline, false
}
log.debug("Done creating backdrop subsystem")
return pipeline, true
}
//INTERNAL
destroy_backdrop :: proc(device: ^sdl.GPUDevice, pipeline: ^Backdrop) {
if pipeline.h_blur_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.h_blur_texture)
if pipeline.downsample_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.downsample_texture)
if pipeline.source_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.source_texture)
if pipeline.sampler != nil do sdl.ReleaseGPUSampler(device, pipeline.sampler)
destroy_buffer(device, &pipeline.primitive_buffer)
if pipeline.blur_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.blur_pipeline)
if pipeline.downsample_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.downsample_pipeline)
}
//----- Working texture management ----------------------------------
// Allocate (or reallocate, on resize) the three working textures that the backdrop bracket
// uses. All three are sized at full swapchain resolution, single-sample, share the swapchain
// format, and need {.COLOR_TARGET, .SAMPLER} usage so they can be written by render passes
// and read by subsequent passes.
//
// `source_texture` is shared infrastructure (used by every backdrop effect).
// `downsample_texture` and `h_blur_texture` are Gaussian-blur-specific intermediates; a
// future backdrop effect with no downsample/blur prep would skip them.
//
// Recreates on dimension change only — same-size frames hit the early-out and skip GPU
// resource churn.
//INTERNAL
ensure_backdrop_textures :: proc(device: ^sdl.GPUDevice, format: sdl.GPUTextureFormat, width, height: u32) {
pipeline := &GLOB.backdrop
if pipeline.source_texture != nil && pipeline.cached_width == width && pipeline.cached_height == height {
return
}
// Free any prior allocations (handles resize and the very-first call where these are nil).
if pipeline.h_blur_texture != nil {
sdl.ReleaseGPUTexture(device, pipeline.h_blur_texture)
pipeline.h_blur_texture = nil
}
if pipeline.downsample_texture != nil {
sdl.ReleaseGPUTexture(device, pipeline.downsample_texture)
pipeline.downsample_texture = nil
}
if pipeline.source_texture != nil {
sdl.ReleaseGPUTexture(device, pipeline.source_texture)
pipeline.source_texture = nil
}
// Working textures are sized at full swapchain resolution to support factor=1 (no downsample
// for small σ, where any 2:1 round-trip would visibly soften the output). Larger factors just
// write to a sub-rect via viewport-limited rendering. See the file-header comment.
working_width := width
working_height := height
pipeline.source_texture = sdl.CreateGPUTexture(
device,
sdl.GPUTextureCreateInfo {
type = .D2,
format = format,
usage = {.COLOR_TARGET, .SAMPLER},
width = width,
height = height,
layer_count_or_depth = 1,
num_levels = 1,
sample_count = ._1,
},
)
if pipeline.source_texture == nil {
log.panicf("Failed to create backdrop source texture (%dx%d): %s", width, height, sdl.GetError())
}
pipeline.downsample_texture = sdl.CreateGPUTexture(
device,
sdl.GPUTextureCreateInfo {
type = .D2,
format = format,
usage = {.COLOR_TARGET, .SAMPLER},
width = working_width,
height = working_height,
layer_count_or_depth = 1,
num_levels = 1,
sample_count = ._1,
},
)
if pipeline.downsample_texture == nil {
log.panicf(
"Failed to create backdrop downsample texture (%dx%d): %s",
working_width,
working_height,
sdl.GetError(),
)
}
pipeline.h_blur_texture = sdl.CreateGPUTexture(
device,
sdl.GPUTextureCreateInfo {
type = .D2,
format = format,
usage = {.COLOR_TARGET, .SAMPLER},
width = working_width,
height = working_height,
layer_count_or_depth = 1,
num_levels = 1,
sample_count = ._1,
},
)
if pipeline.h_blur_texture == nil {
log.panicf(
"Failed to create backdrop h_blur texture (%dx%d): %s",
working_width,
working_height,
sdl.GetError(),
)
}
pipeline.cached_width = width
pipeline.cached_height = height
}
//----- Frame / layer scanners ----------------------------------
// Returns true if any sub-batch in any layer this frame is .Backdrop kind. Called once at the
// top of `end()` to decide whether to route the whole frame to source_texture.
// O(total sub-batches) but with an early-exit on the first hit, so typical cost is tiny.
//INTERNAL
frame_has_backdrop :: proc() -> bool {
for &batch in GLOB.tmp_sub_batches {
if batch.kind == .Backdrop do return true
}
return false
}
// Find the scissor that owns a given sub-batch index by linear scan over GLOB.scissors.
// Used by `run_backdrop_bracket`'s composite pass when the bracket loses its layer-pointer
// context: per-sub-batch scissor lookup is required to honor scissors set up upstream by
// `prepare_clay_batch`'s ScissorStart handling. O(scissors) per sub-batch is acceptable
// because scissor counts are small (single digits in typical UI frames).
//
// Panics if no scissor owns the index. The renderer's invariant is that the scissor list
// forms a contiguous, disjoint cover over `[0, len(tmp_sub_batches))` because every
// sub-batch is created via `append_or_extend_sub_batch` (which increments the active
// scissor's `sub_batch_len` in lockstep with the global array's growth) and scissors are
// only created at the current end-of-array. A miss here means that invariant is broken —
// either by a future code change that bypasses `append_or_extend_sub_batch`, by a scissor
// constructed with the wrong `sub_batch_start`, or by external corruption — and silent
// degradation would mask the bug. The panic message includes the offending index and the
// scissor list shape so the failure is locatable.
//INTERNAL
find_scissor_for_sub_batch :: proc(sub_batch_index: u32) -> sdl.Rect {
for scissor in GLOB.scissors {
if sub_batch_index >= scissor.sub_batch_start &&
sub_batch_index < scissor.sub_batch_start + scissor.sub_batch_len {
return scissor.bounds
}
}
log.panicf(
"find_scissor_for_sub_batch: no scissor owns sub-batch index %d (scissor count=%d, total sub-batches=%d); " +
"the scissor list must form a contiguous cover over all sub-batches",
sub_batch_index,
len(GLOB.scissors),
len(GLOB.tmp_sub_batches),
)
}
// ---------------------------------------------------------------------------------------------------------------------
// ----- Gaussian blur ------------
// ---------------------------------------------------------------------------------------------------------------------
// Adaptive downsample design (Flutter-style).
//
// The bracket picks a downsample factor per-sigma-group, not as a global constant. The choice
// is driven by Flutter's `CalculateScale` formula in
// impeller/entity/contents/filters/gaussian_blur_filter_contents.cc (originally from Skia's
// GrBlurUtils): downsample so that the sigma in working-resolution pixels stays in the
// 2..4 range. This keeps the kernel reach wide enough to hide high-frequency artifacts from
// the bilinear upsample at the composite, while keeping the kernel's discrete tap count
// small (≤3σ reach → ≈12 paired taps).
//
// The full table, in physical pixels (sigma_logical * dpi_scaling):
//
// sigma_phys ≤ 4 → factor = 1 (no downsample; source is sampled directly)
// sigma_phys ≤ 8 → factor = 2
// sigma_phys > 8 → factor = 4 (capped)
//
// Capped at factor=4 to favor visual quality over bandwidth at the high end. Larger factors
// (8 and 16) would lose more high-frequency detail than the kernel can mask even with the
// H+V split, and the bandwidth saving is small (the work region also shrinks quadratically,
// so most of the savings are already captured at factor=4).
//
// Working textures are sized at full swapchain resolution to support factor=1. Larger factors
// just write to a smaller sub-rect via viewport-limited rendering. Memory cost: full-res
// working textures (2 textures, RGBA8) is roughly 16 MB at 1080p, 64 MB at 4K. On modern
// GPUs this is well within budget; on Mali Valhall SBCs it's negligible against unified-
// memory headroom.
//
// The shaders read the factor as a uniform. The downsample shader has three paths (factor=1
// identity, factor=2 single bilinear tap, factor>=4 four bilinear taps with offsets scaling
// by factor/4). The V-composite mode of backdrop_blur.frag uses inv_downsample_factor to
// scale full-res frag coords down to working-res UV.
//----- GPU types ----------------------------------
// Maximum number of (weight, offset) pairs in a single blur kernel. Each pair represents
// the linear-sampling pair adjustment (one bilinear fetch covering two adjacent texels);
// pair[0] is the center weight with offset 0. With 32 pairs we cover up to 63 input texels
// (1 center + 31 paired symmetric taps × 2 texels each), enough for sigma values well past
// the 4..24 typical UI range. Must match MAX_KERNEL_PAIRS in shaders/source/backdrop_blur.frag.
//INTERNAL
MAX_GAUSSIAN_BLUR_KERNEL_PAIRS :: 32
// Gaussian_Blur_Primitive is the GPU-side per-primitive storage layout. Mirrors the GLSL std430
// struct in shaders/source/backdrop_blur.vert. Field order is chosen so std430 alignment
// rules pack the struct to a clean 48-byte natural layout (no implicit padding): vec4
// members come first (16-byte aligned at any offset), then vec2, then scalars. The total is
// a multiple of 16 so the std430 array stride matches size_of(...) exactly.
//
// Gaussian blur primitives are RRect-only: rectangles, rounded rectangles, and circles
// (via uniform_radii) are all expressible. Rotation is intentionally omitted — backdrop
// sampling is in screen space, so a rotated mask over a stationary blur sample would look
// visually wrong. iOS, CSS backdrop-filter, and Flutter BackdropFilter all enforce this
// implicitly; we enforce it explicitly by leaving no rotation field.
//
// Outline is also intentionally omitted. A specialized edge effect (e.g. liquid-glass-style
// refraction outlines) would be implemented as a dedicated primitive type with its own
// pipeline rather than tacked onto this one as a flag bit.
//INTERNAL
Gaussian_Blur_Primitive :: struct {
bounds: [4]f32, // 0: 16 — world-space quad (min_xy, max_xy)
radii: [4]f32, // 16: 16 — per-corner radii in physical pixels (BR, TR, BL, TL)
half_size: [2]f32, // 32: 8 — RRect half extents (physical px)
half_feather: f32, // 40: 4 — feather_px * 0.5 (SDF anti-aliasing)
color: Color, // 44: 4 — tint, packed RGBA u8x4
}
#assert(size_of(Gaussian_Blur_Primitive) == 48)
// Vertex uniforms for the unified blur PSO (mode 0 = H-blur, mode 1 = V-composite).
// Matches the GLSL Uniforms block in shaders/source/backdrop_blur.vert. The downsample
// PSO has no vertex uniforms.
//INTERNAL
Gaussian_Blur_Vert_Uniforms :: struct {
projection: matrix[4, 4]f32, // 0: 64 — screen-space ortho (mode 1 only; mode 0 ignores)
dpi_scale: f32, // 64: 4
mode: u32, // 68: 4 — 0 = H-blur fullscreen tri; 1 = V-composite instanced quads
_pad0: [2]f32, // 72: 8 — std140 vec4 alignment pad
}
// Fragment uniforms for the downsample PSO. Matches Uniforms block in
// shaders/source/backdrop_downsample.frag.
//INTERNAL
Gaussian_Blur_Downsample_Frag_Uniforms :: struct {
inv_source_size: [2]f32, // 0: 8 — 1.0 / source_texture pixel dimensions (full-res)
downsample_factor: u32, // 8: 4 — 1, 2, or 4 (selects identity / 1-tap / 4-tap path in shader)
_pad0: u32, // 12: 4
}
// Fragment uniforms for the unified blur PSO (mode 0 + mode 1). Matches the GLSL Uniforms
// block in shaders/source/backdrop_blur.frag. The kernel array holds the linear-sampling
// pair coefficients computed CPU-side via `compute_blur_kernel`.
//INTERNAL
Gaussian_Blur_Frag_Uniforms :: struct {
inv_working_size: [2]f32, // 0: 8 — 1.0 / working-resolution texture dimensions
pair_count: u32, // 8: 4 — number of (weight, offset) pairs; pair[0] is center
mode: u32, // 12: 4 — 0 = H-blur, 1 = V-composite (must match vert mode)
direction: [2]f32, // 16: 8 — (1,0) for H-blur, (0,1) for V-composite
inv_downsample_factor: f32, // 24: 4 — 1.0 / downsample_factor (mode 1 only; mode 0 ignores)
_pad0: f32, // 28: 4
kernel: [MAX_GAUSSIAN_BLUR_KERNEL_PAIRS][4]f32, // 32: 512 — .x = weight, .y = offset (texels)
}
//----- Kernel computation ----------------------------------
// Compute Gaussian blur kernel weights with the linear-sampling pair adjustment.
// Adapted from RAD Debugger's r_d3d11_g_blur_shader_src CPU-side coefficient generation
// and Daniel Rákos's "Efficient Gaussian blur with linear sampling" article.
//
// The trick: bilinear sampling lets us fetch (1-t)*pixel[i] + t*pixel[i+1] with a single
// texture lookup. So for any pair of adjacent discrete weights w0, w1 we can collapse them
// into one bilinear fetch with weight w = w0+w1 sampled at offset i + w1/w. This halves the
// fragment-shader sample count for a given kernel radius.
//
// Output: `kernel[0]` is the center weight (offset 0), and `kernel[1..pair_count-1]` each
// hold one paired tap (sampled symmetrically as ±offset in the shader). The shader iterates
// `i in [1, pair_count)` and does two texture fetches per pair — one at +offset, one at
// -offset — for a total of 1 + 2*(pair_count-1) bilinear fetches per fragment.
//
// `sigma` is the true Gaussian standard deviation in the kernel's working-space units
// (working-resolution texels, after the caller has converted from logical pixels via
// dpi_scaling and the downsample factor). The kernel extent reaches ±3σ, capturing 99.7% of
// the Gaussian's
// mass; weights beyond that contribute imperceptibly. sigma <= 0 produces a degenerate
// kernel `{1, 0}` that acts as a sharp pass-through. After the loop, the discrete weights
// are normalized so they sum to 1.0 (truncating at ±3σ loses a tiny amount of mass; we
// renormalize to preserve overall image brightness).
//
// Note on the parameter contract: this routine takes σ directly and derives the tap count
// from it, rather than the inverse (RAD Debugger's algorithm passes a tap count and derives
// `stdev = (blur_count-1)/2`). Taking σ directly matches what callers expect when they read
// "gaussian_sigma" — passing tap count under that name was a footgun.
//INTERNAL
compute_blur_kernel :: proc(
sigma: f32,
kernel: ^[MAX_GAUSSIAN_BLUR_KERNEL_PAIRS][4]f32,
) -> (
pair_count: u32,
) {
if sigma <= 0 {
kernel[0] = {1, 0, 0, 0}
return 1
}
// Per-side discrete tap count: ceil(3*sigma) + 1 (center + 3σ reach on each side).
// Cap at the storage budget. With MAX_GAUSSIAN_BLUR_KERNEL_PAIRS=32 each pair collapses 2
// discrete taps via linear-sampling, so max discrete taps per side = 1 + 31*2 = 63.
discrete_taps := u32(math.ceil(3 * sigma)) + 1
max_taps := u32(MAX_GAUSSIAN_BLUR_KERNEL_PAIRS - 1) * 2 + 1
if discrete_taps > max_taps do discrete_taps = max_taps
if discrete_taps < 2 {
// Sigma was so small that 3σ < 1 texel; degenerate to a sharp sample.
kernel[0] = {1, 0, 0, 0}
return 1
}
// Compute discrete weights[i] = exp(-i² / (2σ²)). The inv_root prefactor cancels in the
// final normalization, so we skip it.
weights: [MAX_GAUSSIAN_BLUR_KERNEL_PAIRS * 2]f32 = {}
two_sigma_sq := 2 * sigma * sigma
total: f32 = 0
for i in 0 ..< discrete_taps {
x := f32(i)
weights[i] = math.exp(-x * x / two_sigma_sq)
// weights[0] is the center; weights[1..] are sampled on both sides, so they count twice.
total += weights[i] if i == 0 else 2 * weights[i]
}
// Normalize so the kernel sums to exactly 1.0 across the full ±3σ extent.
if total > 0 {
inv_total := 1.0 / total
for i in 0 ..< discrete_taps do weights[i] *= inv_total
}
// Linear-sampling pair adjustment: weights[1] and weights[2] collapse to one bilinear
// fetch with weight w = w0+w1 at offset i + w1/w. `weights` is sized 2*MAX so that
// `weights[i+1]` access on odd i up to discrete_taps-1 is always in bounds.
kernel[0] = {weights[0], 0, 0, 0}
pair_count = 1
for i := u32(1); i < discrete_taps; i += 2 {
w0 := weights[i]
w1 := weights[i + 1]
w := w0 + w1
// Guard against a div-by-zero where both adjacent weights underflow to 0 (only happens
// at the tail of a very tight kernel; numerically-degenerate but legal).
offset := f32(i)
if w > 0 do offset = f32(i) + w1 / w
kernel[pair_count] = {w, offset, 0, 0}
pair_count += 1
}
return pair_count
}
// Pick a downsample factor for a given sigma. See the file-header comment for the table and
// rationale. Returned values: {1, 2, 4}.
//INTERNAL
compute_backdrop_downsample_factor :: proc(sigma_logical: f32) -> u32 {
sigma_phys := sigma_logical * GLOB.dpi_scaling
switch {
case sigma_phys <= 4: return 1
case sigma_phys <= 8: return 2
case: return 4
}
}
//----- Uniform push helpers ----------------------------------
// Push the Gaussian_Blur_Vert_Uniforms block to the vertex stage at slot 0.
//INTERNAL
push_backdrop_vert_globals :: proc(cmd_buffer: ^sdl.GPUCommandBuffer, width: f32, height: f32, mode: u32) {
uniforms := Gaussian_Blur_Vert_Uniforms {
projection = ortho_rh(left = 0.0, top = 0.0, right = width, bottom = height, near = -1.0, far = 1.0),
dpi_scale = GLOB.dpi_scaling,
mode = mode,
}
sdl.PushGPUVertexUniformData(cmd_buffer, 0, &uniforms, size_of(Gaussian_Blur_Vert_Uniforms))
}
// Push the Gaussian_Blur_Downsample_Frag_Uniforms block to the fragment stage at slot 0.
//INTERNAL
push_backdrop_downsample_frag_globals :: proc(
cmd_buffer: ^sdl.GPUCommandBuffer,
source_width, source_height: u32,
downsample_factor: u32,
) {
uniforms := Gaussian_Blur_Downsample_Frag_Uniforms {
inv_source_size = {1.0 / f32(source_width), 1.0 / f32(source_height)},
downsample_factor = downsample_factor,
}
sdl.PushGPUFragmentUniformData(cmd_buffer, 0, &uniforms, size_of(Gaussian_Blur_Downsample_Frag_Uniforms))
}
// Push the Gaussian_Blur_Frag_Uniforms block (kernel + pass mode/direction) to the fragment stage at slot 0.
//INTERNAL
push_backdrop_blur_frag_globals :: proc(
cmd_buffer: ^sdl.GPUCommandBuffer,
uniforms: ^Gaussian_Blur_Frag_Uniforms,
) {
sdl.PushGPUFragmentUniformData(cmd_buffer, 0, uniforms, size_of(Gaussian_Blur_Frag_Uniforms))
}
//----- Storage-buffer upload ----------------------------------
// Upload all Gaussian_Blur_Primitive instances staged this frame to the backdrop subsystem's storage
// buffer. Mirrors the SDF primitive upload in core_2d.odin's `upload`. Called from
// `end()` inside the same copy pass that uploads vertices/indices/SDF primitives.
//INTERNAL
upload_backdrop_primitives :: proc(device: ^sdl.GPUDevice, pass: ^sdl.GPUCopyPass) {
prim_count := u32(len(GLOB.tmp_gaussian_blur_primitives))
if prim_count == 0 do return
prim_size := prim_count * size_of(Gaussian_Blur_Primitive)
grow_buffer_if_needed(
device,
&GLOB.backdrop.primitive_buffer,
prim_size,
sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ},
)
prim_array := sdl.MapGPUTransferBuffer(device, GLOB.backdrop.primitive_buffer.transfer, false)
if prim_array == nil {
log.panicf("Failed to map backdrop primitive transfer buffer: %s", sdl.GetError())
}
mem.copy(prim_array, raw_data(GLOB.tmp_gaussian_blur_primitives), int(prim_size))
sdl.UnmapGPUTransferBuffer(device, GLOB.backdrop.primitive_buffer.transfer)
sdl.UploadToGPUBuffer(
pass,
sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.backdrop.primitive_buffer.transfer},
sdl.GPUBufferRegion{buffer = GLOB.backdrop.primitive_buffer.gpu, offset = 0, size = prim_size},
false,
)
}
//----- Bracket scheduler ----------------------------------
// Compute the union AABB of the backdrop primitives in a contiguous-same-sigma sub-batch run
// (one "sigma group"), expanded by 6 sigmas of blur reach (the kernel weight beyond 3σ is
// negligible; halo of 6σ covers both the H-blur reads from downsample and the V-blur reads
// from h_blur, since each pass extends its kernel another 3σ from its output position).
// Returns a viewport in physical pixels for the full-resolution render target; the caller
// divides by the chosen downsample factor for the working-resolution passes.
//
// Per-group (rather than per-layer) because the adaptive downsample picks a different factor
// per sigma, and the kernel reach is also per-sigma. A tighter region per group means less
// fragment work in the downsample and H-blur passes.
//INTERNAL
compute_backdrop_group_work_region :: proc(
group_start, group_end: u32,
sigma_logical: f32,
swapchain_width, swapchain_height: u32,
) -> (
region_x, region_y, region_w, region_h: u32,
) {
dpi := GLOB.dpi_scaling
has_any := false
min_x: f32 = 0
min_y: f32 = 0
max_x: f32 = 0
max_y: f32 = 0
for i in group_start ..< group_end {
batch := GLOB.tmp_sub_batches[i]
if batch.kind != .Backdrop do continue
for p in batch.offset ..< batch.offset + batch.count {
prim := GLOB.tmp_gaussian_blur_primitives[p]
// prim.bounds is in logical pixels (world space).
if !has_any {
min_x = prim.bounds[0]
min_y = prim.bounds[1]
max_x = prim.bounds[2]
max_y = prim.bounds[3]
has_any = true
} else {
if prim.bounds[0] < min_x do min_x = prim.bounds[0]
if prim.bounds[1] < min_y do min_y = prim.bounds[1]
if prim.bounds[2] > max_x do max_x = prim.bounds[2]
if prim.bounds[3] > max_y do max_y = prim.bounds[3]
}
}
}
if !has_any do return 0, 0, 0, 0
// Halo = 6σ. The bracket runs two sequential blur passes (H then V). H reads downsample
// at ±3σ from its output; V reads h_blur at ±3σ from its output. So for V outputs at
// primitive_AABB to be valid, h_blur must be valid at primitive_AABB ±3σ, which requires
// the downsample valid at primitive_AABB ±6σ.
halo_logical := 6.0 * sigma_logical
min_x -= halo_logical
min_y -= halo_logical
max_x += halo_logical
max_y += halo_logical
// Convert to physical pixels and clamp to swapchain bounds.
phys_min_x := math.max(min_x * dpi, 0)
phys_min_y := math.max(min_y * dpi, 0)
phys_max_x := math.min(max_x * dpi, f32(swapchain_width))
phys_max_y := math.min(max_y * dpi, f32(swapchain_height))
if phys_max_x <= phys_min_x || phys_max_y <= phys_min_y do return 0, 0, 0, 0
region_x = u32(phys_min_x)
region_y = u32(phys_min_y)
region_w = u32(phys_max_x - phys_min_x)
region_h = u32(phys_max_y - phys_min_y)
return
}
// Run one bracket over a contiguous range of pure-backdrop sub-batches. Assumes:
// - source_texture currently holds the pre-bracket frame contents (everything submitted
// ahead of this bracket on the same layer has already been rendered).
// - The caller has invoked ensure_backdrop_textures with current swapchain dimensions.
// - The half-open range `[sub_batch_start, sub_batch_end)` is non-empty and every
// sub-batch in it has kind == .Backdrop. The caller (draw_layer) guarantees this by
// splitting the layer into runs.
//
// Per-sigma-group execution. The bracket walks the range in submission order, grouping
// contiguous-same-sigma .Backdrop sub-batches. For each group:
// 1. Pick a downsample factor using compute_backdrop_downsample_factor.
// 2. Compute that group's work region (primitives' AABB + 6σ halo, clamped).
// 3. Downsample: source_texture → downsample_texture, viewport-limited to
// work_region/factor. Writes into a sub-rect of the working texture.
// 4. H-blur (mode 0, direction=H): downsample_texture → h_blur_texture, same viewport.
// 5. V-blur (mode 0, direction=V): h_blur_texture → downsample_texture (ping-pong reuse;
// downsample_texture's data is no longer needed). Same viewport.
// 6. Composite (mode 1): downsample_texture (now holds H+V blur) → source_texture, full-
// target viewport, per-primitive SDF discard handles masking and applies the tint.
// Each sub-batch in the group issues an instanced draw under its own scissor (sub-
// batches inherit scissor state from the surrounding ScissorStart/End at submission).
//
// V-blur is run as its own working→working pass rather than folded into the composite. The
// folded variant produces a horizontal-vs-vertical asymmetry artifact (horizontal source
// features end up looking sharper than vertical ones inside the panel). Matching V's
// structure exactly to H's restores symmetry.
//
// On exit, source_texture contains the pre-bracket contents plus all backdrop primitives in
// this range composited on top.
//INTERNAL
run_backdrop_bracket :: proc(
cmd_buffer: ^sdl.GPUCommandBuffer,
sub_batch_start: u32,
sub_batch_end: u32,
swapchain_width, swapchain_height: u32,
) {
pipeline := &GLOB.backdrop
full_viewport := sdl.GPUViewport {
x = 0,
y = 0,
w = f32(swapchain_width),
h = f32(swapchain_height),
min_depth = 0,
max_depth = 1,
}
// Working textures are at full swapchain resolution. Each per-group factor=N pass writes
// only to a sub-rect of dimensions (work_region_phys / N), via viewport-limited rendering.
layer_end := sub_batch_end
i := sub_batch_start
for i < layer_end {
// Caller guarantees this range is pure backdrop sub-batches.
assert(GLOB.tmp_sub_batches[i].kind == .Backdrop, "non-backdrop sub-batch inside bracket range")
batch := GLOB.tmp_sub_batches[i]
// Find the contiguous run of .Backdrop sub-batches with this sigma.
sigma := batch.gaussian_sigma
group_start := i
group_end := i + 1
for group_end < layer_end {
if GLOB.tmp_sub_batches[group_end].gaussian_sigma != sigma do break
group_end += 1
}
// Pick downsample factor for this group.
downsample_factor := compute_backdrop_downsample_factor(sigma)
// Compute this group's work region (primitive AABB + 6σ halo, in physical pixels).
region_x, region_y, region_w, region_h := compute_backdrop_group_work_region(
group_start,
group_end,
sigma,
swapchain_width,
swapchain_height,
)
if region_w == 0 || region_h == 0 {
i = group_end
continue
}
// Convert work region to working-resolution coords (divide by factor, ceil-round-up).
working_x := region_x / downsample_factor
working_y := region_y / downsample_factor
working_w := (region_w + downsample_factor - 1) / downsample_factor
working_h := (region_h + downsample_factor - 1) / downsample_factor
// Working textures are sized at min factor (2). At factor=4 we have only half the texture
// area available in each axis. Clamp to the texture extent for either case.
wt_w := pipeline.cached_width / downsample_factor
wt_h := pipeline.cached_height / downsample_factor
if working_x + working_w > wt_w do working_w = wt_w - working_x
if working_y + working_h > wt_h do working_h = wt_h - working_y
if working_w == 0 || working_h == 0 {
i = group_end
continue
}
working_viewport := sdl.GPUViewport {
x = f32(working_x),
y = f32(working_y),
w = f32(working_w),
h = f32(working_h),
min_depth = 0,
max_depth = 1,
}
working_scissor := sdl.Rect {
x = i32(working_x),
y = i32(working_y),
w = i32(working_w),
h = i32(working_h),
}
// inv_working_size is always relative to the actual texture extent (full swapchain res).
// At factor>1 we're only using a sub-rect, but the texture coords are still divided by the
// full texture's dimensions because that's what gl_FragCoord operates on.
inv_working_size := [2]f32{1.0 / f32(pipeline.cached_width), 1.0 / f32(pipeline.cached_height)}
// Convert the user's logical-pixel sigma into the kernel's working space.
// sigma_working_texels = sigma_logical * dpi_scaling / downsample_factor.
effective_sigma := sigma * GLOB.dpi_scaling / f32(downsample_factor)
frag_uniforms := Gaussian_Blur_Frag_Uniforms {
inv_working_size = inv_working_size,
inv_downsample_factor = 1.0 / f32(downsample_factor),
}
frag_uniforms.pair_count = compute_blur_kernel(effective_sigma, &frag_uniforms.kernel)
//----- Downsample (source_texture → downsample_texture, viewport-limited) ----------
{
pass := sdl.BeginGPURenderPass(
cmd_buffer,
&sdl.GPUColorTargetInfo {
texture = pipeline.downsample_texture,
load_op = .DONT_CARE,
store_op = .STORE,
cycle = true,
},
1,
nil,
)
sdl.BindGPUGraphicsPipeline(pass, pipeline.downsample_pipeline)
sdl.SetGPUViewport(pass, working_viewport)
sdl.SetGPUScissor(pass, working_scissor)
push_backdrop_downsample_frag_globals(
cmd_buffer,
pipeline.cached_width,
pipeline.cached_height,
downsample_factor,
)
sdl.BindGPUFragmentSamplers(
pass,
0,
&sdl.GPUTextureSamplerBinding{texture = pipeline.source_texture, sampler = pipeline.sampler},
1,
)
sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0)
sdl.EndGPURenderPass(pass)
}
//----- H-blur (mode 0, direction=H): downsample_texture → h_blur_texture --------
{
frag_uniforms.mode = 0
frag_uniforms.direction = {1, 0}
pass := sdl.BeginGPURenderPass(
cmd_buffer,
&sdl.GPUColorTargetInfo {
texture = pipeline.h_blur_texture,
load_op = .DONT_CARE,
store_op = .STORE,
cycle = true,
},
1,
nil,
)
sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline)
sdl.SetGPUViewport(pass, working_viewport)
sdl.SetGPUScissor(pass, working_scissor)
// Mode 0's vertex shader is a fullscreen triangle that ignores `projection`; pass
// the standard ortho anyway so the same uniform block works for both modes.
push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 0)
push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms)
// The blur PSO is declared with num_storage_buffers = 1 (mode 1 reads it). SDL3 GPU
// validation requires the binding to be present for *any* draw on this PSO, even
// though mode 0's shader path doesn't actually read it. Bind it here too.
sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1)
sdl.BindGPUFragmentSamplers(
pass,
0,
&sdl.GPUTextureSamplerBinding{texture = pipeline.downsample_texture, sampler = pipeline.sampler},
1,
)
sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0)
sdl.EndGPURenderPass(pass)
}
//----- V-blur (mode 0, direction=V): h_blur_texture → downsample_texture --------
// Ping-pong reuse: downsample_texture's data is no longer needed once H-blur has
// produced its output, so we reuse it as the V-blur target. Saves allocating a third
// working texture.
{
frag_uniforms.mode = 0
frag_uniforms.direction = {0, 1}
pass := sdl.BeginGPURenderPass(
cmd_buffer,
&sdl.GPUColorTargetInfo {
texture = pipeline.downsample_texture,
load_op = .DONT_CARE,
store_op = .STORE,
cycle = true,
},
1,
nil,
)
sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline)
sdl.SetGPUViewport(pass, working_viewport)
sdl.SetGPUScissor(pass, working_scissor)
push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 0)
push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms)
sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1)
sdl.BindGPUFragmentSamplers(
pass,
0,
&sdl.GPUTextureSamplerBinding{texture = pipeline.h_blur_texture, sampler = pipeline.sampler},
1,
)
sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0)
sdl.EndGPURenderPass(pass)
}
//----- Composite (mode 1): downsample_texture (now holds H+V blur) → source_texture --
// No kernel applied here — the working texture is already fully blurred. The shader just
// upsamples (via bilinear filtering on the read), applies the SDF mask, and applies the
// tint. One render pass for the whole sigma group; each sub-batch issues its own draw
// call because non-contiguous-but-same-sigma sub-batches couldn't coalesce upstream.
//
// Per-sub-batch scissor: sub-batches inherit scissor state from ScissorStart/End that
// surrounded their submission. Switching scissors mid-pass is cheap; what matters is
// that the composite respects the same clipping the caller set up.
{
frag_uniforms.mode = 1
// direction is unused in mode 1 but keep it set so reading the uniform doesn't see
// undefined data on platforms that care about that.
frag_uniforms.direction = {0, 0}
pass := sdl.BeginGPURenderPass(
cmd_buffer,
&sdl.GPUColorTargetInfo{texture = pipeline.source_texture, load_op = .LOAD, store_op = .STORE},
1,
nil,
)
sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline)
sdl.SetGPUViewport(pass, full_viewport)
push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 1)
push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms)
sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1)
sdl.BindGPUFragmentSamplers(
pass,
0,
&sdl.GPUTextureSamplerBinding{texture = pipeline.downsample_texture, sampler = pipeline.sampler},
1,
)
current_scissor: sdl.Rect = {0, 0, 0, 0}
scissor_set := false
for j in group_start ..< group_end {
grp := GLOB.tmp_sub_batches[j]
sub_batch_scissor := find_scissor_for_sub_batch(j)
if !scissor_set || sub_batch_scissor != current_scissor {
sdl.SetGPUScissor(pass, sub_batch_scissor)
current_scissor = sub_batch_scissor
scissor_set = true
}
sdl.DrawGPUPrimitives(pass, 6, grp.count, 0, grp.offset)
}
sdl.EndGPURenderPass(pass)
}
i = group_end
}
}
//----- Primitive builders ----------------------------------
// Build a Gaussian_Blur_Primitive with bounds, radii, and feather computed from rectangle
// geometry. The caller sets `color` (tint) on the returned primitive before submitting.
//
// No rotation, no outline — gaussian blur primitives are intentionally limited to axis-aligned
// RRects. Rotation breaks screen-space blur sampling visually; outline would be a specialized
// edge effect that belongs in its own primitive type.
//INTERNAL
build_backdrop_primitive :: proc(
rect: Rectangle,
radii: Rectangle_Radii,
feather_px: f32,
) -> Gaussian_Blur_Primitive {
max_radius := min(rect.width, rect.height) * 0.5
clamped_top_left := clamp(radii.top_left, 0, max_radius)
clamped_top_right := clamp(radii.top_right, 0, max_radius)
clamped_bottom_right := clamp(radii.bottom_right, 0, max_radius)
clamped_bottom_left := clamp(radii.bottom_left, 0, max_radius)
half_feather := feather_px * 0.5
padding := half_feather / GLOB.dpi_scaling
dpi_scale := GLOB.dpi_scaling
half_width := rect.width * 0.5
half_height := rect.height * 0.5
center_x := rect.x + half_width
center_y := rect.y + half_height
return Gaussian_Blur_Primitive {
bounds = {
center_x - half_width - padding,
center_y - half_height - padding,
center_x + half_width + padding,
center_y + half_height + padding,
},
// Radii ordering matches the shader's sdRoundedBox swizzle:
// (p.x > 0) ? r.xy : r.zw picks right-vs-left half
// then (p.y > 0) ? rxy.x : rxy.y picks bottom-vs-top within that half
// So slot 0 = bottom-right, slot 1 = top-right, slot 2 = bottom-left, slot 3 = top-left.
radii = {
clamped_bottom_right * dpi_scale,
clamped_top_right * dpi_scale,
clamped_bottom_left * dpi_scale,
clamped_top_left * dpi_scale,
},
half_size = {half_width * dpi_scale, half_height * dpi_scale},
half_feather = half_feather,
}
}
// Append a Gaussian_Blur_Primitive to the staging array and emit a .Backdrop sub-batch
// carrying the requested gaussian_sigma. Sub-batch coalescing in append_or_extend_sub_batch
// will merge contiguous backdrops that share a sigma into a single instanced draw.
//INTERNAL
prepare_backdrop_primitive :: proc(layer: ^Layer, prim: Gaussian_Blur_Primitive, gaussian_sigma: f32) {
offset := u32(len(GLOB.tmp_gaussian_blur_primitives))
append(&GLOB.tmp_gaussian_blur_primitives, prim)
scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1]
append_or_extend_sub_batch(
scissor,
layer,
.Backdrop,
offset = offset,
count = 1,
gaussian_sigma = gaussian_sigma,
)
}
//----- Public API ----------------------------------
// Draw a rectangle whose interior samples a Gaussian-blurred snapshot of the framebuffer
// behind it. RRect-only — covers rectangles, rounded rectangles, and circles via
// uniform_radii.
//
// `gaussian_sigma` is the Gaussian standard deviation in logical pixels. Typical UI range is
// 4..24. sigma <= 0 produces a sharp framebuffer mirror (no blur).
//
// `tint` controls the color of the frosted glass:
// - tint.rgb is the tint color.
// - tint.a is the tint *mix strength*, NOT panel opacity. The panel is always fully
// opaque inside its mask (matching real frosted glass and iOS UIBlurEffect / CSS
// backdrop-filter). At alpha=0 the user sees the pure blur unchanged; at alpha=255
// the blur is fully multiplied by tint.rgb. Intermediate values lerp between the two.
// - For a translucent panel layered over content, draw a separate translucent rect on
// top instead — the backdrop's job is to deliver the blur, not to blend with what's
// beneath it.
//
// Backdrop primitives have no rotation: backdrop sampling is in screen space, so a rotated
// mask over a stationary blur sample would look visually wrong. iOS UIVisualEffectView,
// CSS backdrop-filter, and Flutter BackdropFilter all enforce this implicitly; we enforce
// it explicitly by leaving no rotation parameter.
//
// Within a single layer, primitives sharing the same `gaussian_sigma` share one H+V blur
// pass pair via sub-batch coalescing. Primitives with different sigmas in the same layer
// trigger separate blur passes (cost scales with the number of unique sigmas).
//
// Must be called inside a `begin_backdrop` / `end_backdrop` scope (or use `backdrop_scope`).
backdrop_blur :: proc(
layer: ^Layer,
rect: Rectangle,
gaussian_sigma: f32,
tint: Color = DFT_TINT,
radii: Rectangle_Radii = {},
feather_px: f32 = DFT_FEATHER_PX,
) {
prim := build_backdrop_primitive(rect, radii, feather_px)
prim.color = tint
prepare_backdrop_primitive(layer, prim, gaussian_sigma)
}