package draw import "core:log" import "core:math" import "core:mem" import sdl "vendor:sdl3" // This file hosts the backdrop subsystem: any visual effect that samples the current // framebuffer as input. Today the only implemented effect is Gaussian blur (frosted glass); // future effects (refraction, mirror, etc.) will live here too. // // The file is split into two top-level sections: // // 1. Shared backdrop infrastructure — bracket coordination, source_texture lifecycle, // sub-batch scanners. These are general to any backdrop effect: every backdrop effect // needs a snapshot of the framebuffer (source_texture) and needs to participate in the // bracket render-pass-boundary scheduling. When a second effect is added, its // per-effect resources go in their own section like the Gaussian blur one below; this // shared section stays. // // 2. Gaussian blur — the only effect implemented today. Owns its own PSOs, working // textures (downsample / h_blur), per-primitive storage layout, kernel math, and // bracket-runner inner loop. None of this is shared with future backdrop effects: a // refraction shader would have its own PSO, its own primitive struct, and likely // wouldn't need the downsample/h_blur intermediates at all. // // The `Backdrop` struct currently holds resources from both categories; field-group // comments inside it mark which are which. When a second effect lands the struct will be // split, but doing that pre-emptively means inventing a per-effect dispatch protocol on // speculation. Better to keep the conflation visible (and labeled) until concrete needs // shape the design. // --------------------------------------------------------------------------------------------------------------------- // ----- Shared backdrop infrastructure ------------ // --------------------------------------------------------------------------------------------------------------------- //INTERNAL Backdrop :: struct { // -- Shared across all backdrop effects -- // When any backdrop draw exists this frame, the entire frame renders into source_texture // instead of the swapchain. Acts as the bracket's snapshot input by virtue of already // containing the pre-bracket frame. Copied to the swapchain at frame end. source_texture: ^sdl.GPUTexture, // Cached pixel dimensions for resize-detection in `ensure_backdrop_textures`. cached_width: u32, cached_height: u32, // Linear-clamp sampler used for sampling source_texture (and Gaussian blur's working // textures). Linear filtering is required by the Gaussian linear-sampling pair trick; // any future backdrop effect that samples source_texture with bilinear interpolation // can reuse this sampler. Clamp avoids edge-bleed at work-region boundaries. sampler: ^sdl.GPUSampler, // -- Gaussian blur effect -- // Two graphics pipelines. The downsample PSO is a single-bilinear-sample fullscreen pass; // the blur PSO is mode-branched (H-blur fullscreen + V-composite instanced) and shares // one shader program for both modes via a uniform `mode` selector. downsample_pipeline: ^sdl.GPUGraphicsPipeline, blur_pipeline: ^sdl.GPUGraphicsPipeline, // Per-instance Gaussian_Blur_Primitive storage buffer. Grows on demand via grow_buffer_if_needed. // All backdrop primitives across all layers in a frame share this single buffer; sub-batches // reference into it by offset. primitive_buffer: Buffer, // Working textures, allocated once at swapchain resolution and recreated only on resize. // Both are sized at full swapchain resolution and single-sample. Larger downsample // factors fill only a sub-rect via viewport-limited rendering (see file-header comment // on adaptive downsampling in the Gaussian blur section below). // downsample_texture — written by the downsample PSO. Read by the blur PSO in mode 0. // h_blur_texture — written by the blur PSO in mode 0. Read by the blur PSO in mode 1. downsample_texture: ^sdl.GPUTexture, h_blur_texture: ^sdl.GPUTexture, } //INTERNAL create_backdrop :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window) -> (pipeline: Backdrop, ok: bool) { // On failure, clean up any partially-created resources. defer if !ok { if pipeline.sampler != nil do sdl.ReleaseGPUSampler(device, pipeline.sampler) if pipeline.primitive_buffer.gpu != nil do destroy_buffer(device, &pipeline.primitive_buffer) if pipeline.blur_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.blur_pipeline) if pipeline.downsample_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.downsample_pipeline) } active_shader_formats := sdl.GetGPUShaderFormats(device) if PLATFORM_SHADER_FORMAT_FLAG not_in active_shader_formats { log.errorf( "backdrop: no embedded shader matches active GPU formats; build supports %v but device reports %v", PLATFORM_SHADER_FORMAT, active_shader_formats, ) return pipeline, false } swapchain_format := sdl.GetGPUSwapchainTextureFormat(device, window) //----- Shader modules ---------------------------------- fullscreen_vert := sdl.CreateGPUShader( device, sdl.GPUShaderCreateInfo { code_size = len(BACKDROP_FULLSCREEN_VERT_RAW), code = raw_data(BACKDROP_FULLSCREEN_VERT_RAW), entrypoint = SHADER_ENTRY, format = {PLATFORM_SHADER_FORMAT_FLAG}, stage = .VERTEX, }, ) if fullscreen_vert == nil { log.errorf("Could not create backdrop fullscreen vertex shader: %s", sdl.GetError()) return pipeline, false } defer sdl.ReleaseGPUShader(device, fullscreen_vert) downsample_frag := sdl.CreateGPUShader( device, sdl.GPUShaderCreateInfo { code_size = len(BACKDROP_DOWNSAMPLE_FRAG_RAW), code = raw_data(BACKDROP_DOWNSAMPLE_FRAG_RAW), entrypoint = SHADER_ENTRY, format = {PLATFORM_SHADER_FORMAT_FLAG}, stage = .FRAGMENT, num_samplers = 1, num_uniform_buffers = 1, }, ) if downsample_frag == nil { log.errorf("Could not create backdrop downsample fragment shader: %s", sdl.GetError()) return pipeline, false } defer sdl.ReleaseGPUShader(device, downsample_frag) blur_vert := sdl.CreateGPUShader( device, sdl.GPUShaderCreateInfo { code_size = len(BACKDROP_BLUR_VERT_RAW), code = raw_data(BACKDROP_BLUR_VERT_RAW), entrypoint = SHADER_ENTRY, format = {PLATFORM_SHADER_FORMAT_FLAG}, stage = .VERTEX, num_uniform_buffers = 1, num_storage_buffers = 1, }, ) if blur_vert == nil { log.errorf("Could not create backdrop blur vertex shader: %s", sdl.GetError()) return pipeline, false } defer sdl.ReleaseGPUShader(device, blur_vert) blur_frag := sdl.CreateGPUShader( device, sdl.GPUShaderCreateInfo { code_size = len(BACKDROP_BLUR_FRAG_RAW), code = raw_data(BACKDROP_BLUR_FRAG_RAW), entrypoint = SHADER_ENTRY, format = {PLATFORM_SHADER_FORMAT_FLAG}, stage = .FRAGMENT, num_samplers = 1, num_uniform_buffers = 1, }, ) if blur_frag == nil { log.errorf("Could not create backdrop blur fragment shader: %s", sdl.GetError()) return pipeline, false } defer sdl.ReleaseGPUShader(device, blur_frag) //----- Downsample PSO ---------------------------------- // Single bilinear sample, blend disabled. No vertex buffer (gl_VertexIndex 0..2 emits the // fullscreen triangle). Single-sample target (working textures are never MSAA). downsample_target := sdl.GPUColorTargetDescription { format = swapchain_format, blend_state = sdl.GPUColorTargetBlendState{enable_blend = false}, } pipeline.downsample_pipeline = sdl.CreateGPUGraphicsPipeline( device, sdl.GPUGraphicsPipelineCreateInfo { vertex_shader = fullscreen_vert, fragment_shader = downsample_frag, primitive_type = .TRIANGLELIST, multisample_state = sdl.GPUMultisampleState{sample_count = ._1}, target_info = sdl.GPUGraphicsPipelineTargetInfo { color_target_descriptions = &downsample_target, num_color_targets = 1, }, }, ) if pipeline.downsample_pipeline == nil { log.errorf("Failed to create backdrop downsample graphics pipeline: %s", sdl.GetError()) return pipeline, false } //----- Blur PSO (H-blur + V-composite, mode-branched) -------------- // Premultiplied-over blend matching the main pipeline. No vertex buffer (mode 0 uses // gl_VertexIndex 0..2 fullscreen tri; mode 1 uses gl_VertexIndex 0..5 unit-quad + // gl_InstanceIndex into the storage buffer). // // Single-sample throughout: levlib does not support MSAA (see init's doc comment in // draw.odin). The whole frame renders to single-sample targets, so sample_count = ._1 // matches both mode 0 (writes h_blur_texture) and mode 1 (writes source_texture). blur_target := sdl.GPUColorTargetDescription { format = swapchain_format, blend_state = sdl.GPUColorTargetBlendState { enable_blend = true, enable_color_write_mask = true, src_color_blendfactor = .ONE, dst_color_blendfactor = .ONE_MINUS_SRC_ALPHA, color_blend_op = .ADD, src_alpha_blendfactor = .ONE, dst_alpha_blendfactor = .ONE_MINUS_SRC_ALPHA, alpha_blend_op = .ADD, color_write_mask = sdl.GPUColorComponentFlags{.R, .G, .B, .A}, }, } pipeline.blur_pipeline = sdl.CreateGPUGraphicsPipeline( device, sdl.GPUGraphicsPipelineCreateInfo { vertex_shader = blur_vert, fragment_shader = blur_frag, primitive_type = .TRIANGLELIST, multisample_state = sdl.GPUMultisampleState{sample_count = ._1}, target_info = sdl.GPUGraphicsPipelineTargetInfo { color_target_descriptions = &blur_target, num_color_targets = 1, }, }, ) if pipeline.blur_pipeline == nil { log.errorf("Failed to create backdrop blur graphics pipeline: %s", sdl.GetError()) return pipeline, false } //----- Storage buffer for Gaussian_Blur_Primitive instances ------------- pipeline.primitive_buffer = create_buffer( device, size_of(Gaussian_Blur_Primitive) * BUFFER_INIT_SIZE, sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ}, ) or_return //----- Sampler ---------------------------------- pipeline.sampler = sdl.CreateGPUSampler( device, sdl.GPUSamplerCreateInfo { min_filter = .LINEAR, mag_filter = .LINEAR, mipmap_mode = .LINEAR, address_mode_u = .CLAMP_TO_EDGE, address_mode_v = .CLAMP_TO_EDGE, address_mode_w = .CLAMP_TO_EDGE, }, ) if pipeline.sampler == nil { log.errorf("Could not create backdrop GPU sampler: %s", sdl.GetError()) return pipeline, false } log.debug("Done creating backdrop subsystem") return pipeline, true } //INTERNAL destroy_backdrop :: proc(device: ^sdl.GPUDevice, pipeline: ^Backdrop) { if pipeline.h_blur_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.h_blur_texture) if pipeline.downsample_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.downsample_texture) if pipeline.source_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.source_texture) if pipeline.sampler != nil do sdl.ReleaseGPUSampler(device, pipeline.sampler) destroy_buffer(device, &pipeline.primitive_buffer) if pipeline.blur_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.blur_pipeline) if pipeline.downsample_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.downsample_pipeline) } //----- Working texture management ---------------------------------- // Allocate (or reallocate, on resize) the three working textures that the backdrop bracket // uses. All three are sized at full swapchain resolution, single-sample, share the swapchain // format, and need {.COLOR_TARGET, .SAMPLER} usage so they can be written by render passes // and read by subsequent passes. // // `source_texture` is shared infrastructure (used by every backdrop effect). // `downsample_texture` and `h_blur_texture` are Gaussian-blur-specific intermediates; a // future backdrop effect with no downsample/blur prep would skip them. // // Recreates on dimension change only — same-size frames hit the early-out and skip GPU // resource churn. //INTERNAL ensure_backdrop_textures :: proc(device: ^sdl.GPUDevice, format: sdl.GPUTextureFormat, width, height: u32) { pipeline := &GLOB.backdrop if pipeline.source_texture != nil && pipeline.cached_width == width && pipeline.cached_height == height { return } // Free any prior allocations (handles resize and the very-first call where these are nil). if pipeline.h_blur_texture != nil { sdl.ReleaseGPUTexture(device, pipeline.h_blur_texture) pipeline.h_blur_texture = nil } if pipeline.downsample_texture != nil { sdl.ReleaseGPUTexture(device, pipeline.downsample_texture) pipeline.downsample_texture = nil } if pipeline.source_texture != nil { sdl.ReleaseGPUTexture(device, pipeline.source_texture) pipeline.source_texture = nil } // Working textures are sized at full swapchain resolution to support factor=1 (no downsample // for small σ, where any 2:1 round-trip would visibly soften the output). Larger factors just // write to a sub-rect via viewport-limited rendering. See the file-header comment. working_width := width working_height := height pipeline.source_texture = sdl.CreateGPUTexture( device, sdl.GPUTextureCreateInfo { type = .D2, format = format, usage = {.COLOR_TARGET, .SAMPLER}, width = width, height = height, layer_count_or_depth = 1, num_levels = 1, sample_count = ._1, }, ) if pipeline.source_texture == nil { log.panicf("Failed to create backdrop source texture (%dx%d): %s", width, height, sdl.GetError()) } pipeline.downsample_texture = sdl.CreateGPUTexture( device, sdl.GPUTextureCreateInfo { type = .D2, format = format, usage = {.COLOR_TARGET, .SAMPLER}, width = working_width, height = working_height, layer_count_or_depth = 1, num_levels = 1, sample_count = ._1, }, ) if pipeline.downsample_texture == nil { log.panicf( "Failed to create backdrop downsample texture (%dx%d): %s", working_width, working_height, sdl.GetError(), ) } pipeline.h_blur_texture = sdl.CreateGPUTexture( device, sdl.GPUTextureCreateInfo { type = .D2, format = format, usage = {.COLOR_TARGET, .SAMPLER}, width = working_width, height = working_height, layer_count_or_depth = 1, num_levels = 1, sample_count = ._1, }, ) if pipeline.h_blur_texture == nil { log.panicf( "Failed to create backdrop h_blur texture (%dx%d): %s", working_width, working_height, sdl.GetError(), ) } pipeline.cached_width = width pipeline.cached_height = height } //----- Frame / layer scanners ---------------------------------- // Returns true if any sub-batch in any layer this frame is .Backdrop kind. Called once at the // top of `end()` to decide whether to route the whole frame to source_texture. // O(total sub-batches) but with an early-exit on the first hit, so typical cost is tiny. //INTERNAL frame_has_backdrop :: proc() -> bool { for &batch in GLOB.tmp_sub_batches { if batch.kind == .Backdrop do return true } return false } // Find the scissor that owns a given sub-batch index by linear scan over GLOB.scissors. // Used by `run_backdrop_bracket`'s composite pass when the bracket loses its layer-pointer // context: per-sub-batch scissor lookup is required to honor scissors set up upstream by // `prepare_clay_batch`'s ScissorStart handling. O(scissors) per sub-batch is acceptable // because scissor counts are small (single digits in typical UI frames). // // Panics if no scissor owns the index. The renderer's invariant is that the scissor list // forms a contiguous, disjoint cover over `[0, len(tmp_sub_batches))` because every // sub-batch is created via `append_or_extend_sub_batch` (which increments the active // scissor's `sub_batch_len` in lockstep with the global array's growth) and scissors are // only created at the current end-of-array. A miss here means that invariant is broken — // either by a future code change that bypasses `append_or_extend_sub_batch`, by a scissor // constructed with the wrong `sub_batch_start`, or by external corruption — and silent // degradation would mask the bug. The panic message includes the offending index and the // scissor list shape so the failure is locatable. //INTERNAL find_scissor_for_sub_batch :: proc(sub_batch_index: u32) -> sdl.Rect { for scissor in GLOB.scissors { if sub_batch_index >= scissor.sub_batch_start && sub_batch_index < scissor.sub_batch_start + scissor.sub_batch_len { return scissor.bounds } } log.panicf( "find_scissor_for_sub_batch: no scissor owns sub-batch index %d (scissor count=%d, total sub-batches=%d); " + "the scissor list must form a contiguous cover over all sub-batches", sub_batch_index, len(GLOB.scissors), len(GLOB.tmp_sub_batches), ) } // --------------------------------------------------------------------------------------------------------------------- // ----- Gaussian blur ------------ // --------------------------------------------------------------------------------------------------------------------- // Adaptive downsample design (Flutter-style). // // The bracket picks a downsample factor per-sigma-group, not as a global constant. The choice // is driven by Flutter's `CalculateScale` formula in // impeller/entity/contents/filters/gaussian_blur_filter_contents.cc (originally from Skia's // GrBlurUtils): downsample so that the sigma in working-resolution pixels stays in the // 2..4 range. This keeps the kernel reach wide enough to hide high-frequency artifacts from // the bilinear upsample at the composite, while keeping the kernel's discrete tap count // small (≤3σ reach → ≈12 paired taps). // // The full table, in physical pixels (sigma_logical * dpi_scaling): // // sigma_phys ≤ 4 → factor = 1 (no downsample; source is sampled directly) // sigma_phys ≤ 8 → factor = 2 // sigma_phys > 8 → factor = 4 (capped) // // Capped at factor=4 to favor visual quality over bandwidth at the high end. Larger factors // (8 and 16) would lose more high-frequency detail than the kernel can mask even with the // H+V split, and the bandwidth saving is small (the work region also shrinks quadratically, // so most of the savings are already captured at factor=4). // // Working textures are sized at full swapchain resolution to support factor=1. Larger factors // just write to a smaller sub-rect via viewport-limited rendering. Memory cost: full-res // working textures (2 textures, RGBA8) is roughly 16 MB at 1080p, 64 MB at 4K. On modern // GPUs this is well within budget; on Mali Valhall SBCs it's negligible against unified- // memory headroom. // // The shaders read the factor as a uniform. The downsample shader has three paths (factor=1 // identity, factor=2 single bilinear tap, factor>=4 four bilinear taps with offsets scaling // by factor/4). The V-composite mode of backdrop_blur.frag uses inv_downsample_factor to // scale full-res frag coords down to working-res UV. //----- GPU types ---------------------------------- // Maximum number of (weight, offset) pairs in a single blur kernel. Each pair represents // the linear-sampling pair adjustment (one bilinear fetch covering two adjacent texels); // pair[0] is the center weight with offset 0. With 32 pairs we cover up to 63 input texels // (1 center + 31 paired symmetric taps × 2 texels each), enough for sigma values well past // the 4..24 typical UI range. Must match MAX_KERNEL_PAIRS in shaders/source/backdrop_blur.frag. //INTERNAL MAX_GAUSSIAN_BLUR_KERNEL_PAIRS :: 32 // Gaussian_Blur_Primitive is the GPU-side per-primitive storage layout. Mirrors the GLSL std430 // struct in shaders/source/backdrop_blur.vert. Field order is chosen so std430 alignment // rules pack the struct to a clean 48-byte natural layout (no implicit padding): vec4 // members come first (16-byte aligned at any offset), then vec2, then scalars. The total is // a multiple of 16 so the std430 array stride matches size_of(...) exactly. // // Gaussian blur primitives are RRect-only: rectangles, rounded rectangles, and circles // (via uniform_radii) are all expressible. Rotation is intentionally omitted — backdrop // sampling is in screen space, so a rotated mask over a stationary blur sample would look // visually wrong. iOS, CSS backdrop-filter, and Flutter BackdropFilter all enforce this // implicitly; we enforce it explicitly by leaving no rotation field. // // Outline is also intentionally omitted. A specialized edge effect (e.g. liquid-glass-style // refraction outlines) would be implemented as a dedicated primitive type with its own // pipeline rather than tacked onto this one as a flag bit. //INTERNAL Gaussian_Blur_Primitive :: struct { bounds: [4]f32, // 0: 16 — world-space quad (min_xy, max_xy) radii: [4]f32, // 16: 16 — per-corner radii in physical pixels (BR, TR, BL, TL) half_size: [2]f32, // 32: 8 — RRect half extents (physical px) half_feather: f32, // 40: 4 — feather_px * 0.5 (SDF anti-aliasing) color: Color, // 44: 4 — tint, packed RGBA u8x4 } #assert(size_of(Gaussian_Blur_Primitive) == 48) // Vertex uniforms for the unified blur PSO (mode 0 = H-blur, mode 1 = V-composite). // Matches the GLSL Uniforms block in shaders/source/backdrop_blur.vert. The downsample // PSO has no vertex uniforms. //INTERNAL Gaussian_Blur_Vert_Uniforms :: struct { projection: matrix[4, 4]f32, // 0: 64 — screen-space ortho (mode 1 only; mode 0 ignores) dpi_scale: f32, // 64: 4 mode: u32, // 68: 4 — 0 = H-blur fullscreen tri; 1 = V-composite instanced quads _pad0: [2]f32, // 72: 8 — std140 vec4 alignment pad } // Fragment uniforms for the downsample PSO. Matches Uniforms block in // shaders/source/backdrop_downsample.frag. //INTERNAL Gaussian_Blur_Downsample_Frag_Uniforms :: struct { inv_source_size: [2]f32, // 0: 8 — 1.0 / source_texture pixel dimensions (full-res) downsample_factor: u32, // 8: 4 — 1, 2, or 4 (selects identity / 1-tap / 4-tap path in shader) _pad0: u32, // 12: 4 } // Fragment uniforms for the unified blur PSO (mode 0 + mode 1). Matches the GLSL Uniforms // block in shaders/source/backdrop_blur.frag. The kernel array holds the linear-sampling // pair coefficients computed CPU-side via `compute_blur_kernel`. //INTERNAL Gaussian_Blur_Frag_Uniforms :: struct { inv_working_size: [2]f32, // 0: 8 — 1.0 / working-resolution texture dimensions pair_count: u32, // 8: 4 — number of (weight, offset) pairs; pair[0] is center mode: u32, // 12: 4 — 0 = H-blur, 1 = V-composite (must match vert mode) direction: [2]f32, // 16: 8 — (1,0) for H-blur, (0,1) for V-composite inv_downsample_factor: f32, // 24: 4 — 1.0 / downsample_factor (mode 1 only; mode 0 ignores) _pad0: f32, // 28: 4 kernel: [MAX_GAUSSIAN_BLUR_KERNEL_PAIRS][4]f32, // 32: 512 — .x = weight, .y = offset (texels) } //----- Kernel computation ---------------------------------- // Compute Gaussian blur kernel weights with the linear-sampling pair adjustment. // Adapted from RAD Debugger's r_d3d11_g_blur_shader_src CPU-side coefficient generation // and Daniel Rákos's "Efficient Gaussian blur with linear sampling" article. // // The trick: bilinear sampling lets us fetch (1-t)*pixel[i] + t*pixel[i+1] with a single // texture lookup. So for any pair of adjacent discrete weights w0, w1 we can collapse them // into one bilinear fetch with weight w = w0+w1 sampled at offset i + w1/w. This halves the // fragment-shader sample count for a given kernel radius. // // Output: `kernel[0]` is the center weight (offset 0), and `kernel[1..pair_count-1]` each // hold one paired tap (sampled symmetrically as ±offset in the shader). The shader iterates // `i in [1, pair_count)` and does two texture fetches per pair — one at +offset, one at // -offset — for a total of 1 + 2*(pair_count-1) bilinear fetches per fragment. // // `sigma` is the true Gaussian standard deviation in the kernel's working-space units // (working-resolution texels, after the caller has converted from logical pixels via // dpi_scaling and the downsample factor). The kernel extent reaches ±3σ, capturing 99.7% of // the Gaussian's // mass; weights beyond that contribute imperceptibly. sigma <= 0 produces a degenerate // kernel `{1, 0}` that acts as a sharp pass-through. After the loop, the discrete weights // are normalized so they sum to 1.0 (truncating at ±3σ loses a tiny amount of mass; we // renormalize to preserve overall image brightness). // // Note on the parameter contract: this routine takes σ directly and derives the tap count // from it, rather than the inverse (RAD Debugger's algorithm passes a tap count and derives // `stdev = (blur_count-1)/2`). Taking σ directly matches what callers expect when they read // "gaussian_sigma" — passing tap count under that name was a footgun. //INTERNAL compute_blur_kernel :: proc( sigma: f32, kernel: ^[MAX_GAUSSIAN_BLUR_KERNEL_PAIRS][4]f32, ) -> ( pair_count: u32, ) { if sigma <= 0 { kernel[0] = {1, 0, 0, 0} return 1 } // Per-side discrete tap count: ceil(3*sigma) + 1 (center + 3σ reach on each side). // Cap at the storage budget. With MAX_GAUSSIAN_BLUR_KERNEL_PAIRS=32 each pair collapses 2 // discrete taps via linear-sampling, so max discrete taps per side = 1 + 31*2 = 63. discrete_taps := u32(math.ceil(3 * sigma)) + 1 max_taps := u32(MAX_GAUSSIAN_BLUR_KERNEL_PAIRS - 1) * 2 + 1 if discrete_taps > max_taps do discrete_taps = max_taps if discrete_taps < 2 { // Sigma was so small that 3σ < 1 texel; degenerate to a sharp sample. kernel[0] = {1, 0, 0, 0} return 1 } // Compute discrete weights[i] = exp(-i² / (2σ²)). The inv_root prefactor cancels in the // final normalization, so we skip it. weights: [MAX_GAUSSIAN_BLUR_KERNEL_PAIRS * 2]f32 = {} two_sigma_sq := 2 * sigma * sigma total: f32 = 0 for i in 0 ..< discrete_taps { x := f32(i) weights[i] = math.exp(-x * x / two_sigma_sq) // weights[0] is the center; weights[1..] are sampled on both sides, so they count twice. total += weights[i] if i == 0 else 2 * weights[i] } // Normalize so the kernel sums to exactly 1.0 across the full ±3σ extent. if total > 0 { inv_total := 1.0 / total for i in 0 ..< discrete_taps do weights[i] *= inv_total } // Linear-sampling pair adjustment: weights[1] and weights[2] collapse to one bilinear // fetch with weight w = w0+w1 at offset i + w1/w. `weights` is sized 2*MAX so that // `weights[i+1]` access on odd i up to discrete_taps-1 is always in bounds. kernel[0] = {weights[0], 0, 0, 0} pair_count = 1 for i := u32(1); i < discrete_taps; i += 2 { w0 := weights[i] w1 := weights[i + 1] w := w0 + w1 // Guard against a div-by-zero where both adjacent weights underflow to 0 (only happens // at the tail of a very tight kernel; numerically-degenerate but legal). offset := f32(i) if w > 0 do offset = f32(i) + w1 / w kernel[pair_count] = {w, offset, 0, 0} pair_count += 1 } return pair_count } // Pick a downsample factor for a given sigma. See the file-header comment for the table and // rationale. Returned values: {1, 2, 4}. //INTERNAL compute_backdrop_downsample_factor :: proc(sigma_logical: f32) -> u32 { sigma_phys := sigma_logical * GLOB.dpi_scaling switch { case sigma_phys <= 4: return 1 case sigma_phys <= 8: return 2 case: return 4 } } //----- Uniform push helpers ---------------------------------- // Push the Gaussian_Blur_Vert_Uniforms block to the vertex stage at slot 0. //INTERNAL push_backdrop_vert_globals :: proc(cmd_buffer: ^sdl.GPUCommandBuffer, width: f32, height: f32, mode: u32) { uniforms := Gaussian_Blur_Vert_Uniforms { projection = ortho_rh(left = 0.0, top = 0.0, right = width, bottom = height, near = -1.0, far = 1.0), dpi_scale = GLOB.dpi_scaling, mode = mode, } sdl.PushGPUVertexUniformData(cmd_buffer, 0, &uniforms, size_of(Gaussian_Blur_Vert_Uniforms)) } // Push the Gaussian_Blur_Downsample_Frag_Uniforms block to the fragment stage at slot 0. //INTERNAL push_backdrop_downsample_frag_globals :: proc( cmd_buffer: ^sdl.GPUCommandBuffer, source_width, source_height: u32, downsample_factor: u32, ) { uniforms := Gaussian_Blur_Downsample_Frag_Uniforms { inv_source_size = {1.0 / f32(source_width), 1.0 / f32(source_height)}, downsample_factor = downsample_factor, } sdl.PushGPUFragmentUniformData(cmd_buffer, 0, &uniforms, size_of(Gaussian_Blur_Downsample_Frag_Uniforms)) } // Push the Gaussian_Blur_Frag_Uniforms block (kernel + pass mode/direction) to the fragment stage at slot 0. //INTERNAL push_backdrop_blur_frag_globals :: proc( cmd_buffer: ^sdl.GPUCommandBuffer, uniforms: ^Gaussian_Blur_Frag_Uniforms, ) { sdl.PushGPUFragmentUniformData(cmd_buffer, 0, uniforms, size_of(Gaussian_Blur_Frag_Uniforms)) } //----- Storage-buffer upload ---------------------------------- // Upload all Gaussian_Blur_Primitive instances staged this frame to the backdrop subsystem's storage // buffer. Mirrors the SDF primitive upload in core_2d.odin's `upload`. Called from // `end()` inside the same copy pass that uploads vertices/indices/SDF primitives. //INTERNAL upload_backdrop_primitives :: proc(device: ^sdl.GPUDevice, pass: ^sdl.GPUCopyPass) { prim_count := u32(len(GLOB.tmp_gaussian_blur_primitives)) if prim_count == 0 do return prim_size := prim_count * size_of(Gaussian_Blur_Primitive) grow_buffer_if_needed( device, &GLOB.backdrop.primitive_buffer, prim_size, sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ}, ) prim_array := sdl.MapGPUTransferBuffer(device, GLOB.backdrop.primitive_buffer.transfer, false) if prim_array == nil { log.panicf("Failed to map backdrop primitive transfer buffer: %s", sdl.GetError()) } mem.copy(prim_array, raw_data(GLOB.tmp_gaussian_blur_primitives), int(prim_size)) sdl.UnmapGPUTransferBuffer(device, GLOB.backdrop.primitive_buffer.transfer) sdl.UploadToGPUBuffer( pass, sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.backdrop.primitive_buffer.transfer}, sdl.GPUBufferRegion{buffer = GLOB.backdrop.primitive_buffer.gpu, offset = 0, size = prim_size}, false, ) } //----- Bracket scheduler ---------------------------------- // Compute the union AABB of the backdrop primitives in a contiguous-same-sigma sub-batch run // (one "sigma group"), expanded by 6 sigmas of blur reach (the kernel weight beyond 3σ is // negligible; halo of 6σ covers both the H-blur reads from downsample and the V-blur reads // from h_blur, since each pass extends its kernel another 3σ from its output position). // Returns a viewport in physical pixels for the full-resolution render target; the caller // divides by the chosen downsample factor for the working-resolution passes. // // Per-group (rather than per-layer) because the adaptive downsample picks a different factor // per sigma, and the kernel reach is also per-sigma. A tighter region per group means less // fragment work in the downsample and H-blur passes. //INTERNAL compute_backdrop_group_work_region :: proc( group_start, group_end: u32, sigma_logical: f32, swapchain_width, swapchain_height: u32, ) -> ( region_x, region_y, region_w, region_h: u32, ) { dpi := GLOB.dpi_scaling has_any := false min_x: f32 = 0 min_y: f32 = 0 max_x: f32 = 0 max_y: f32 = 0 for i in group_start ..< group_end { batch := GLOB.tmp_sub_batches[i] if batch.kind != .Backdrop do continue for p in batch.offset ..< batch.offset + batch.count { prim := GLOB.tmp_gaussian_blur_primitives[p] // prim.bounds is in logical pixels (world space). if !has_any { min_x = prim.bounds[0] min_y = prim.bounds[1] max_x = prim.bounds[2] max_y = prim.bounds[3] has_any = true } else { if prim.bounds[0] < min_x do min_x = prim.bounds[0] if prim.bounds[1] < min_y do min_y = prim.bounds[1] if prim.bounds[2] > max_x do max_x = prim.bounds[2] if prim.bounds[3] > max_y do max_y = prim.bounds[3] } } } if !has_any do return 0, 0, 0, 0 // Halo = 6σ. The bracket runs two sequential blur passes (H then V). H reads downsample // at ±3σ from its output; V reads h_blur at ±3σ from its output. So for V outputs at // primitive_AABB to be valid, h_blur must be valid at primitive_AABB ±3σ, which requires // the downsample valid at primitive_AABB ±6σ. halo_logical := 6.0 * sigma_logical min_x -= halo_logical min_y -= halo_logical max_x += halo_logical max_y += halo_logical // Convert to physical pixels and clamp to swapchain bounds. phys_min_x := math.max(min_x * dpi, 0) phys_min_y := math.max(min_y * dpi, 0) phys_max_x := math.min(max_x * dpi, f32(swapchain_width)) phys_max_y := math.min(max_y * dpi, f32(swapchain_height)) if phys_max_x <= phys_min_x || phys_max_y <= phys_min_y do return 0, 0, 0, 0 region_x = u32(phys_min_x) region_y = u32(phys_min_y) region_w = u32(phys_max_x - phys_min_x) region_h = u32(phys_max_y - phys_min_y) return } // Run one bracket over a contiguous range of pure-backdrop sub-batches. Assumes: // - source_texture currently holds the pre-bracket frame contents (everything submitted // ahead of this bracket on the same layer has already been rendered). // - The caller has invoked ensure_backdrop_textures with current swapchain dimensions. // - The half-open range `[sub_batch_start, sub_batch_end)` is non-empty and every // sub-batch in it has kind == .Backdrop. The caller (draw_layer) guarantees this by // splitting the layer into runs. // // Per-sigma-group execution. The bracket walks the range in submission order, grouping // contiguous-same-sigma .Backdrop sub-batches. For each group: // 1. Pick a downsample factor using compute_backdrop_downsample_factor. // 2. Compute that group's work region (primitives' AABB + 6σ halo, clamped). // 3. Downsample: source_texture → downsample_texture, viewport-limited to // work_region/factor. Writes into a sub-rect of the working texture. // 4. H-blur (mode 0, direction=H): downsample_texture → h_blur_texture, same viewport. // 5. V-blur (mode 0, direction=V): h_blur_texture → downsample_texture (ping-pong reuse; // downsample_texture's data is no longer needed). Same viewport. // 6. Composite (mode 1): downsample_texture (now holds H+V blur) → source_texture, full- // target viewport, per-primitive SDF discard handles masking and applies the tint. // Each sub-batch in the group issues an instanced draw under its own scissor (sub- // batches inherit scissor state from the surrounding ScissorStart/End at submission). // // V-blur is run as its own working→working pass rather than folded into the composite. The // folded variant produces a horizontal-vs-vertical asymmetry artifact (horizontal source // features end up looking sharper than vertical ones inside the panel). Matching V's // structure exactly to H's restores symmetry. // // On exit, source_texture contains the pre-bracket contents plus all backdrop primitives in // this range composited on top. //INTERNAL run_backdrop_bracket :: proc( cmd_buffer: ^sdl.GPUCommandBuffer, sub_batch_start: u32, sub_batch_end: u32, swapchain_width, swapchain_height: u32, ) { pipeline := &GLOB.backdrop full_viewport := sdl.GPUViewport { x = 0, y = 0, w = f32(swapchain_width), h = f32(swapchain_height), min_depth = 0, max_depth = 1, } // Working textures are at full swapchain resolution. Each per-group factor=N pass writes // only to a sub-rect of dimensions (work_region_phys / N), via viewport-limited rendering. layer_end := sub_batch_end i := sub_batch_start for i < layer_end { // Caller guarantees this range is pure backdrop sub-batches. assert(GLOB.tmp_sub_batches[i].kind == .Backdrop, "non-backdrop sub-batch inside bracket range") batch := GLOB.tmp_sub_batches[i] // Find the contiguous run of .Backdrop sub-batches with this sigma. sigma := batch.gaussian_sigma group_start := i group_end := i + 1 for group_end < layer_end { if GLOB.tmp_sub_batches[group_end].gaussian_sigma != sigma do break group_end += 1 } // Pick downsample factor for this group. downsample_factor := compute_backdrop_downsample_factor(sigma) // Compute this group's work region (primitive AABB + 6σ halo, in physical pixels). region_x, region_y, region_w, region_h := compute_backdrop_group_work_region( group_start, group_end, sigma, swapchain_width, swapchain_height, ) if region_w == 0 || region_h == 0 { i = group_end continue } // Convert work region to working-resolution coords (divide by factor, ceil-round-up). working_x := region_x / downsample_factor working_y := region_y / downsample_factor working_w := (region_w + downsample_factor - 1) / downsample_factor working_h := (region_h + downsample_factor - 1) / downsample_factor // Working textures are sized at min factor (2). At factor=4 we have only half the texture // area available in each axis. Clamp to the texture extent for either case. wt_w := pipeline.cached_width / downsample_factor wt_h := pipeline.cached_height / downsample_factor if working_x + working_w > wt_w do working_w = wt_w - working_x if working_y + working_h > wt_h do working_h = wt_h - working_y if working_w == 0 || working_h == 0 { i = group_end continue } working_viewport := sdl.GPUViewport { x = f32(working_x), y = f32(working_y), w = f32(working_w), h = f32(working_h), min_depth = 0, max_depth = 1, } working_scissor := sdl.Rect { x = i32(working_x), y = i32(working_y), w = i32(working_w), h = i32(working_h), } // inv_working_size is always relative to the actual texture extent (full swapchain res). // At factor>1 we're only using a sub-rect, but the texture coords are still divided by the // full texture's dimensions because that's what gl_FragCoord operates on. inv_working_size := [2]f32{1.0 / f32(pipeline.cached_width), 1.0 / f32(pipeline.cached_height)} // Convert the user's logical-pixel sigma into the kernel's working space. // sigma_working_texels = sigma_logical * dpi_scaling / downsample_factor. effective_sigma := sigma * GLOB.dpi_scaling / f32(downsample_factor) frag_uniforms := Gaussian_Blur_Frag_Uniforms { inv_working_size = inv_working_size, inv_downsample_factor = 1.0 / f32(downsample_factor), } frag_uniforms.pair_count = compute_blur_kernel(effective_sigma, &frag_uniforms.kernel) //----- Downsample (source_texture → downsample_texture, viewport-limited) ---------- { pass := sdl.BeginGPURenderPass( cmd_buffer, &sdl.GPUColorTargetInfo { texture = pipeline.downsample_texture, load_op = .DONT_CARE, store_op = .STORE, cycle = true, }, 1, nil, ) sdl.BindGPUGraphicsPipeline(pass, pipeline.downsample_pipeline) sdl.SetGPUViewport(pass, working_viewport) sdl.SetGPUScissor(pass, working_scissor) push_backdrop_downsample_frag_globals( cmd_buffer, pipeline.cached_width, pipeline.cached_height, downsample_factor, ) sdl.BindGPUFragmentSamplers( pass, 0, &sdl.GPUTextureSamplerBinding{texture = pipeline.source_texture, sampler = pipeline.sampler}, 1, ) sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0) sdl.EndGPURenderPass(pass) } //----- H-blur (mode 0, direction=H): downsample_texture → h_blur_texture -------- { frag_uniforms.mode = 0 frag_uniforms.direction = {1, 0} pass := sdl.BeginGPURenderPass( cmd_buffer, &sdl.GPUColorTargetInfo { texture = pipeline.h_blur_texture, load_op = .DONT_CARE, store_op = .STORE, cycle = true, }, 1, nil, ) sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline) sdl.SetGPUViewport(pass, working_viewport) sdl.SetGPUScissor(pass, working_scissor) // Mode 0's vertex shader is a fullscreen triangle that ignores `projection`; pass // the standard ortho anyway so the same uniform block works for both modes. push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 0) push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms) // The blur PSO is declared with num_storage_buffers = 1 (mode 1 reads it). SDL3 GPU // validation requires the binding to be present for *any* draw on this PSO, even // though mode 0's shader path doesn't actually read it. Bind it here too. sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1) sdl.BindGPUFragmentSamplers( pass, 0, &sdl.GPUTextureSamplerBinding{texture = pipeline.downsample_texture, sampler = pipeline.sampler}, 1, ) sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0) sdl.EndGPURenderPass(pass) } //----- V-blur (mode 0, direction=V): h_blur_texture → downsample_texture -------- // Ping-pong reuse: downsample_texture's data is no longer needed once H-blur has // produced its output, so we reuse it as the V-blur target. Saves allocating a third // working texture. { frag_uniforms.mode = 0 frag_uniforms.direction = {0, 1} pass := sdl.BeginGPURenderPass( cmd_buffer, &sdl.GPUColorTargetInfo { texture = pipeline.downsample_texture, load_op = .DONT_CARE, store_op = .STORE, cycle = true, }, 1, nil, ) sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline) sdl.SetGPUViewport(pass, working_viewport) sdl.SetGPUScissor(pass, working_scissor) push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 0) push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms) sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1) sdl.BindGPUFragmentSamplers( pass, 0, &sdl.GPUTextureSamplerBinding{texture = pipeline.h_blur_texture, sampler = pipeline.sampler}, 1, ) sdl.DrawGPUPrimitives(pass, 3, 1, 0, 0) sdl.EndGPURenderPass(pass) } //----- Composite (mode 1): downsample_texture (now holds H+V blur) → source_texture -- // No kernel applied here — the working texture is already fully blurred. The shader just // upsamples (via bilinear filtering on the read), applies the SDF mask, and applies the // tint. One render pass for the whole sigma group; each sub-batch issues its own draw // call because non-contiguous-but-same-sigma sub-batches couldn't coalesce upstream. // // Per-sub-batch scissor: sub-batches inherit scissor state from ScissorStart/End that // surrounded their submission. Switching scissors mid-pass is cheap; what matters is // that the composite respects the same clipping the caller set up. { frag_uniforms.mode = 1 // direction is unused in mode 1 but keep it set so reading the uniform doesn't see // undefined data on platforms that care about that. frag_uniforms.direction = {0, 0} pass := sdl.BeginGPURenderPass( cmd_buffer, &sdl.GPUColorTargetInfo{texture = pipeline.source_texture, load_op = .LOAD, store_op = .STORE}, 1, nil, ) sdl.BindGPUGraphicsPipeline(pass, pipeline.blur_pipeline) sdl.SetGPUViewport(pass, full_viewport) push_backdrop_vert_globals(cmd_buffer, f32(swapchain_width), f32(swapchain_height), 1) push_backdrop_blur_frag_globals(cmd_buffer, &frag_uniforms) sdl.BindGPUVertexStorageBuffers(pass, 0, ([^]^sdl.GPUBuffer)(&pipeline.primitive_buffer.gpu), 1) sdl.BindGPUFragmentSamplers( pass, 0, &sdl.GPUTextureSamplerBinding{texture = pipeline.downsample_texture, sampler = pipeline.sampler}, 1, ) current_scissor: sdl.Rect = {0, 0, 0, 0} scissor_set := false for j in group_start ..< group_end { grp := GLOB.tmp_sub_batches[j] sub_batch_scissor := find_scissor_for_sub_batch(j) if !scissor_set || sub_batch_scissor != current_scissor { sdl.SetGPUScissor(pass, sub_batch_scissor) current_scissor = sub_batch_scissor scissor_set = true } sdl.DrawGPUPrimitives(pass, 6, grp.count, 0, grp.offset) } sdl.EndGPURenderPass(pass) } i = group_end } } //----- Primitive builders ---------------------------------- // Build a Gaussian_Blur_Primitive with bounds, radii, and feather computed from rectangle // geometry. The caller sets `color` (tint) on the returned primitive before submitting. // // No rotation, no outline — gaussian blur primitives are intentionally limited to axis-aligned // RRects. Rotation breaks screen-space blur sampling visually; outline would be a specialized // edge effect that belongs in its own primitive type. //INTERNAL build_backdrop_primitive :: proc( rect: Rectangle, radii: Rectangle_Radii, feather_px: f32, ) -> Gaussian_Blur_Primitive { max_radius := min(rect.width, rect.height) * 0.5 clamped_top_left := clamp(radii.top_left, 0, max_radius) clamped_top_right := clamp(radii.top_right, 0, max_radius) clamped_bottom_right := clamp(radii.bottom_right, 0, max_radius) clamped_bottom_left := clamp(radii.bottom_left, 0, max_radius) half_feather := feather_px * 0.5 padding := half_feather / GLOB.dpi_scaling dpi_scale := GLOB.dpi_scaling half_width := rect.width * 0.5 half_height := rect.height * 0.5 center_x := rect.x + half_width center_y := rect.y + half_height return Gaussian_Blur_Primitive { bounds = { center_x - half_width - padding, center_y - half_height - padding, center_x + half_width + padding, center_y + half_height + padding, }, // Radii ordering matches the shader's sdRoundedBox swizzle: // (p.x > 0) ? r.xy : r.zw picks right-vs-left half // then (p.y > 0) ? rxy.x : rxy.y picks bottom-vs-top within that half // So slot 0 = bottom-right, slot 1 = top-right, slot 2 = bottom-left, slot 3 = top-left. radii = { clamped_bottom_right * dpi_scale, clamped_top_right * dpi_scale, clamped_bottom_left * dpi_scale, clamped_top_left * dpi_scale, }, half_size = {half_width * dpi_scale, half_height * dpi_scale}, half_feather = half_feather, } } // Append a Gaussian_Blur_Primitive to the staging array and emit a .Backdrop sub-batch // carrying the requested gaussian_sigma. Sub-batch coalescing in append_or_extend_sub_batch // will merge contiguous backdrops that share a sigma into a single instanced draw. //INTERNAL prepare_backdrop_primitive :: proc(layer: ^Layer, prim: Gaussian_Blur_Primitive, gaussian_sigma: f32) { offset := u32(len(GLOB.tmp_gaussian_blur_primitives)) append(&GLOB.tmp_gaussian_blur_primitives, prim) scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] append_or_extend_sub_batch( scissor, layer, .Backdrop, offset = offset, count = 1, gaussian_sigma = gaussian_sigma, ) } //----- Public API ---------------------------------- // Draw a rectangle whose interior samples a Gaussian-blurred snapshot of the framebuffer // behind it. RRect-only — covers rectangles, rounded rectangles, and circles via // uniform_radii. // // `gaussian_sigma` is the Gaussian standard deviation in logical pixels. Typical UI range is // 4..24. sigma <= 0 produces a sharp framebuffer mirror (no blur). // // `tint` controls the color of the frosted glass: // - tint.rgb is the tint color. // - tint.a is the tint *mix strength*, NOT panel opacity. The panel is always fully // opaque inside its mask (matching real frosted glass and iOS UIBlurEffect / CSS // backdrop-filter). At alpha=0 the user sees the pure blur unchanged; at alpha=255 // the blur is fully multiplied by tint.rgb. Intermediate values lerp between the two. // - For a translucent panel layered over content, draw a separate translucent rect on // top instead — the backdrop's job is to deliver the blur, not to blend with what's // beneath it. // // Backdrop primitives have no rotation: backdrop sampling is in screen space, so a rotated // mask over a stationary blur sample would look visually wrong. iOS UIVisualEffectView, // CSS backdrop-filter, and Flutter BackdropFilter all enforce this implicitly; we enforce // it explicitly by leaving no rotation parameter. // // Within a single layer, primitives sharing the same `gaussian_sigma` share one H+V blur // pass pair via sub-batch coalescing. Primitives with different sigmas in the same layer // trigger separate blur passes (cost scales with the number of unique sigmas). // // Must be called inside a `begin_backdrop` / `end_backdrop` scope (or use `backdrop_scope`). backdrop_blur :: proc( layer: ^Layer, rect: Rectangle, gaussian_sigma: f32, tint: Color = DFT_TINT, radii: Rectangle_Radii = {}, feather_px: f32 = DFT_FEATHER_PX, ) { prim := build_backdrop_primitive(rect, radii, feather_px) prim.color = tint prepare_backdrop_primitive(layer, prim, gaussian_sigma) }