diff --git a/.zed/tasks.json b/.zed/tasks.json index e08acae..8b14508 100644 --- a/.zed/tasks.json +++ b/.zed/tasks.json @@ -70,6 +70,11 @@ "command": "odin run draw/examples -debug -out=out/debug/draw-examples -- hellope-custom", "cwd": "$ZED_WORKTREE_ROOT", }, + { + "label": "Run draw textures example", + "command": "odin run draw/examples -debug -out=out/debug/draw-examples -- textures", + "cwd": "$ZED_WORKTREE_ROOT", + }, { "label": "Run qrcode basic example", "command": "odin run qrcode/examples -debug -out=out/debug/qrcode-examples -- basic", diff --git a/draw/README.md b/draw/README.md index 1066a7e..5eeabf2 100644 --- a/draw/README.md +++ b/draw/README.md @@ -47,99 +47,107 @@ primitives and effects can be added to the library without architectural changes ### Overview: three pipelines -The 2D renderer will use three GPU pipelines, split by **register pressure compatibility** and -**render-state requirements**: +The 2D renderer uses three GPU pipelines, split by **register pressure** (main vs effects) and +**render-pass structure** (everything vs backdrop): -1. **Main pipeline** — shapes (SDF and tessellated) and text. Low register footprint (~18–22 - registers per thread). Runs at high GPU occupancy. Handles 90%+ of all fragments in a typical - frame. +1. **Main pipeline** — shapes (SDF and tessellated), text, and textured rectangles. Low register + footprint (~18–24 registers per thread). Runs at full GPU occupancy on every architecture. + Handles 90%+ of all fragments in a typical frame. 2. **Effects pipeline** — drop shadows, inner shadows, outer glow, and similar ALU-bound blur effects. Medium register footprint (~48–60 registers). Each effects primitive includes the base shape's SDF so that it can draw both the effect and the shape in a single fragment pass, avoiding - redundant overdraw. + redundant overdraw. Separated from the main pipeline to protect main-pipeline occupancy on + low-end hardware (see register analysis below). -3. **Backdrop-effects pipeline** — frosted glass, refraction, and any effect that samples the current - render target as input. High register footprint (~70–80 registers) and structurally requires a - `CopyGPUTextureToTexture` from the render target before drawing. Separated both for register - pressure and because the texture-copy requirement forces a render-pass-level state change. +3. **Backdrop pipeline** — frosted glass, refraction, and any effect that samples the current render + target as input. Implemented as a multi-pass sequence (downsample, separable blur, composite), + where each individual pass has a low-to-medium register footprint (~15–40 registers). Separated + from the other pipelines because it structurally requires ending the current render pass and + copying the render target before any backdrop-sampling fragment can execute — a command-buffer- + level boundary that cannot be avoided regardless of shader complexity. A typical UI frame with no effects uses 1 pipeline bind and 0 switches. A frame with drop shadows uses 2 pipelines and 1 switch. A frame with shadows and frosted glass uses all 3 pipelines and 2 -switches plus 1 texture copy. At ~5μs per pipeline bind on modern APIs, worst-case switching overhead -is under 0.15% of an 8.3ms (120 FPS) frame budget. +switches plus 1 texture copy. At ~1–5μs per pipeline bind on modern APIs, worst-case switching +overhead is negligible relative to an 8.3ms (120 FPS) frame budget. ### Why three pipelines, not one or seven The natural question is whether we should use a single unified pipeline (fewer state changes, simpler code) or many per-primitive-type pipelines (no branching overhead, lean per-shader register usage). -The dominant cost factor is **GPU register pressure**, not pipeline switching overhead or fragment -shader branching. A GPU shader core has a fixed register pool shared among all concurrent threads. The -compiler allocates registers pessimistically based on the worst-case path through the shader. If the -shader contains both a 20-register RRect SDF and a 72-register frosted-glass blur, _every_ fragment -— even trivial RRects — is allocated 72 registers. This directly reduces **occupancy** (the number of -warps that can run simultaneously), which reduces the GPU's ability to hide memory latency. +#### Main/effects split: register pressure -Concrete occupancy analysis on modern NVIDIA SMs, which have 65,536 32-bit registers and a -hardware-imposed maximum thread count per SM that varies by architecture (Volta/A100: 2,048; -consumer Ampere/Ada: 1,536). Occupancy is register-limited only when `65536 / regs_per_thread` falls -below the hardware thread cap; above that cap, occupancy is 100% regardless of register count. +A GPU shader core has a fixed register pool shared among all concurrent threads. The compiler +allocates registers pessimistically based on the worst-case path through the shader. If the shader +contains both a 20-register RRect SDF and a 48-register drop-shadow blur, _every_ fragment — even +trivial RRects — is allocated 48 registers. This directly reduces **occupancy** (the number of +warps/wavefronts that can run simultaneously), which reduces the GPU's ability to hide memory +latency. -On consumer Ampere/Ada GPUs (RTX 30xx/40xx, max 1,536 threads per SM): +Each GPU architecture has a **register cliff** — a threshold above which occupancy starts dropping. +Below the cliff, adding registers has zero occupancy cost. -| Register allocation | Reg-limited threads | Actual (hw-capped) | Occupancy | -| ------------------------- | ------------------- | ------------------ | --------- | -| 20 regs (RRect only) | 3,276 | 1,536 | 100% | -| 32 regs | 2,048 | 1,536 | 100% | -| 48 regs (+ drop shadow) | 1,365 | 1,365 | ~89% | -| 72 regs (+ frosted glass) | 910 | 910 | ~59% | +On consumer Ampere/Ada GPUs (RTX 30xx/40xx, 65,536 regs/SM, max 1,536 threads/SM, cliff at ~43 regs): -On Volta/A100 GPUs (max 2,048 threads per SM): +| Register allocation | Reg-limited threads | Actual (hw-capped) | Occupancy | +| ----------------------- | ------------------- | ------------------ | --------- | +| 20 regs (main pipeline) | 3,276 | 1,536 | 100% | +| 32 regs | 2,048 | 1,536 | 100% | +| 48 regs (effects) | 1,365 | 1,365 | ~89% | -| Register allocation | Reg-limited threads | Actual (hw-capped) | Occupancy | -| ------------------------- | ------------------- | ------------------ | --------- | -| 20 regs (RRect only) | 3,276 | 2,048 | 100% | -| 32 regs | 2,048 | 2,048 | 100% | -| 48 regs (+ drop shadow) | 1,365 | 1,365 | ~67% | -| 72 regs (+ frosted glass) | 910 | 910 | ~44% | +On Volta/A100 GPUs (65,536 regs/SM, max 2,048 threads/SM, cliff at ~32 regs): -The register cliff — where occupancy begins dropping — starts at ~43 regs/thread on consumer -Ampere/Ada (65536 / 1536) and ~32 regs/thread on Volta/A100 (65536 / 2048). Below the cliff, -adding registers has zero occupancy cost. +| Register allocation | Reg-limited threads | Actual (hw-capped) | Occupancy | +| ----------------------- | ------------------- | ------------------ | --------- | +| 20 regs (main pipeline) | 3,276 | 2,048 | 100% | +| 32 regs | 2,048 | 2,048 | 100% | +| 48 regs (effects) | 1,365 | 1,365 | ~67% | -The impact of reduced occupancy depends on whether the shader is memory-latency-bound (where -occupancy is critical for hiding latency) or ALU-bound (where it matters less). For the -backdrop-effects pipeline's frosted-glass shader, which performs multiple dependent texture reads, -59% occupancy (consumer) or 44% occupancy (Volta) meaningfully reduces the GPU's ability to hide -texture latency — roughly a 1.7× to 2.3× throughput reduction compared to full occupancy. At 4K with -1.5× overdraw (~12.4M fragments), if the main pipeline's fragment work at full occupancy takes ~2ms, -a single unified shader containing the glass branch would push it to ~3.4–4.6ms depending on -architecture. This is a per-frame multiplier, not a per-primitive cost — it applies even when the -heavy branch is never taken, because the compiler allocates registers for the worst-case path. +On low-end mobile (ARM Mali Bifrost/Valhall, 64 regs/thread, cliff fixed at 32 regs): -**Note on Apple M3+ GPUs:** Apple's M3 GPU architecture introduces Dynamic Caching (register file -virtualization), which allocates registers dynamically at runtime based on actual usage rather than -worst-case declared usage. This significantly reduces the static register-pressure-to-occupancy -penalty described above. The tier split remains useful on Apple hardware for other reasons (keeping -the backdrop texture-copy out of the main render pass, isolating blur ALU complexity), but the -register-pressure argument specifically weakens on M3 and later. +| Register allocation | Occupancy | +| -------------------- | -------------------------- | +| 0–32 regs (main) | 100% (full thread count) | +| 33–64 regs (effects) | ~50% (thread count halves) | -The three-pipeline split groups primitives by register footprint so that: +Mali's cliff at 32 registers is the binding constraint. On desktop the occupancy difference between +20 and 48 registers is modest (89–100%); on Mali it is a hard 2× throughput reduction. The +main/effects split protects 90%+ of a frame's fragments (shapes, text, textures) from the effects +pipeline's register cost. -- Main pipeline (~20 regs): all fragments run at full occupancy on every architecture. -- Effects pipeline (~48–55 regs): shadow/glow fragments run at 67–89% occupancy depending on - architecture; unavoidable given the blur math complexity. -- Backdrop-effects pipeline (~72–75 regs): glass fragments run at 44–59% occupancy; also - unavoidable, and structurally separated anyway by the texture-copy requirement. +For the effects pipeline's drop-shadow shader — erf-approximation blur math with several texture +fetches — 50% occupancy on Mali roughly halves throughput. At 4K with 1.5× overdraw (~12.4M +fragments), a single unified shader containing the shadow branch would cost ~4ms instead of ~2ms on +low-end mobile. This is a per-frame multiplier even when the heavy branch is never taken, because the +compiler allocates registers for the worst-case path. -This avoids the register-pressure tax of a single unified shader while keeping pipeline count minimal -(3 vs. Zed GPUI's 7). The effects that drag occupancy down are isolated to the fragments that -actually need them. Crucially, all shape kinds within the main pipeline (SDF, tessellated, text) -cluster at 12–24 registers — well below the register cliff on every architecture — so unifying them -costs nothing in occupancy. +All main-pipeline members (SDF shapes, tessellated geometry, text, textured rectangles) cluster at +12–24 registers — below the cliff on every architecture — so unifying them costs nothing in +occupancy. -**Why not per-primitive-type pipelines (GPUI's approach)?** Zed's GPUI uses 7 separate shader pairs: +**Note on Apple M3+ GPUs:** Apple's M3 introduces Dynamic Caching (register file virtualization), +which allocates registers at runtime based on actual usage rather than worst-case. This weakens the +static register-pressure argument on M3 and later, but the split remains useful for isolating blur +ALU complexity and keeping the backdrop texture-copy out of the main render pass. + +#### Backdrop split: render-pass structure + +The backdrop pipeline (frosted glass, refraction, mirror surfaces) is separated for a structural +reason unrelated to register pressure. Before any backdrop-sampling fragment can execute, the current +render target must be copied to a separate texture via `CopyGPUTextureToTexture` — a command-buffer- +level operation that requires ending the current render pass. This boundary exists regardless of +shader complexity and cannot be optimized away. + +The backdrop pipeline's individual shader passes (downsample, separable blur, composite) are +register-light (~15–40 regs each), so merging them into the effects pipeline would cause no occupancy +problem. But the render-pass boundary makes merging structurally impossible — effects draws happen +inside the main render pass, backdrop draws happen inside their own bracketed pass sequence. + +#### Why not per-primitive-type pipelines (GPUI's approach) + +Zed's GPUI uses 7 separate shader pairs: quad, shadow, underline, monochrome sprite, polychrome sprite, path, surface. This eliminates all branching and gives each shader minimal register usage. Three concrete costs make this approach wrong for our use case: @@ -151,7 +159,7 @@ typical UI frame with 15 scissors and 3–4 primitive kinds per scissor, per-kin ~45–60 draw calls and pipeline binds; our unified approach produces ~15–20 draw calls and 1–5 pipeline binds. At ~5μs each for CPU-side command encoding on modern APIs, per-kind splitting adds 375–500μs of CPU overhead per frame — **4.5–6% of an 8.3ms (120 FPS) budget** — with no -compensating GPU-side benefit, because the register-pressure savings within the simple-SDF tier are +compensating GPU-side benefit, because the register-pressure savings within the simple-SDF range are negligible (all members cluster at 12–22 registers). **Z-order preservation forces the API to expose layers.** With a single pipeline drawing all kinds @@ -190,8 +198,8 @@ in submission order: ~60 boundary warps at ~80 extra instructions each), unified divergence costs ~13μs — still 3.5× cheaper than the pipeline-switching alternative. -The split we _do_ perform (main / effects / backdrop-effects) is motivated by register-pressure tier -boundaries where occupancy drops are significant at 4K (see numbers above). Within a tier, unified is +The split we _do_ perform (main / effects / backdrop) is motivated by register-pressure boundaries +and structural render-pass requirements (see analysis above). Within a pipeline, unified is strictly better by every measure: fewer draw calls, simpler Z-order, lower CPU overhead, and negligible GPU-side branching cost. @@ -483,25 +491,40 @@ Wallace's variant) and vger-rs. - Vello's implementation of blurred rounded rectangle as a gradient type: https://github.com/linebender/vello/pull/665 -### Backdrop-effects pipeline +### Backdrop pipeline -The backdrop-effects pipeline handles effects that sample the current render target as input: frosted -glass, refraction, mirror surfaces. It is structurally separated from the effects pipeline for two -reasons: +The backdrop pipeline handles effects that sample the current render target as input: frosted glass, +refraction, mirror surfaces. It is separated from the effects pipeline for a structural reason, not +register pressure. -1. **Render-state requirement.** Before any backdrop-sampling fragment can run, the current render - target must be copied to a separate texture via `CopyGPUTextureToTexture`. This is a command- - buffer-level operation that cannot happen mid-render-pass. The copy naturally creates a pipeline - boundary. +**Render-pass boundary.** Before any backdrop-sampling fragment can run, the current render target +must be copied to a separate texture via `CopyGPUTextureToTexture`. This is a command-buffer-level +operation that cannot happen mid-render-pass. The copy naturally creates a pipeline boundary that no +amount of shader optimization can eliminate — it is a fundamental requirement of sampling a surface +while also writing to it. -2. **Register pressure.** Backdrop-sampling shaders read from a texture with Gaussian kernel weights - (multiple texture fetches per fragment), pushing register usage to ~70–80. Including this in the - effects pipeline would reduce occupancy for all shadow/glow fragments from ~30% to ~20%, costing - measurable throughput on the common case. +**Multi-pass implementation.** Backdrop effects are implemented as separable multi-pass sequences +(downsample → horizontal blur → vertical blur → composite), following the standard approach used by +iOS `UIVisualEffectView`, Android `RenderEffect`, and Flutter's `BackdropFilter`. Each individual +pass has a low-to-medium register footprint (~15–40 registers), well within the main pipeline's +occupancy range. The multi-pass approach avoids the monolithic 70+ register shader that a single-pass +Gaussian blur would require, making backdrop effects viable on low-end mobile GPUs (including +Mali-G31 and VideoCore VI) where per-thread register limits are tight. -The backdrop-effects pipeline binds a secondary sampler pointing at the captured backdrop texture. When -no backdrop effects are present in a frame, this pipeline is never bound and the texture copy never -happens — zero cost. +**Bracketed execution.** All backdrop draws in a frame share a single bracketed region of the command +buffer: end the current render pass, copy the render target, execute all backdrop sub-passes, then +resume normal drawing. The entry/exit cost (texture copy + render-pass break) is paid once per frame +regardless of how many backdrop effects are visible. When no backdrop effects are present, the bracket +is never entered and the texture copy never happens — zero cost. + +**Why not split the backdrop sub-passes into separate pipelines?** The individual passes range from +~15 to ~40 registers, which does cross Mali's 32-register cliff. However, the register-pressure argument +that justifies the main/effects split does not apply here. The main/effects split protects the +_common path_ (90%+ of frame fragments) from the uncommon path's register cost. Inside the backdrop +pipeline there is no common-vs-uncommon distinction — if backdrop effects are active, every sub-pass +runs; if not, none run. The backdrop pipeline either executes as a complete unit or not at all. +Additionally, backdrop effects cover a small fraction of the frame's total fragments (~5% at typical +UI scales), so the occupancy variation within the bracket has negligible impact on frame time. ### Vertex layout @@ -524,19 +547,21 @@ The `Primitive` struct for SDF shapes lives in the storage buffer, not in vertex ``` Primitive :: struct { - kind: Shape_Kind, // 0: enum u8 - flags: Shape_Flags, // 1: bit_set[Shape_Flag; u8] - _pad: u16, // 2: reserved - bounds: [4]f32, // 4: min_x, min_y, max_x, max_y - color: Color, // 20: u8x4 - _pad2: [3]u8, // 24: alignment - params: Shape_Params, // 28: raw union, 32 bytes + bounds: [4]f32, // 0: min_x, min_y, max_x, max_y + color: Color, // 16: u8x4, unpacked in shader via unpackUnorm4x8 + kind_flags: u32, // 20: (kind as u32) | (flags as u32 << 8) + rotation: f32, // 24: shader self-rotation in radians + _pad: f32, // 28: alignment + params: Shape_Params, // 32: raw union, 32 bytes (two vec4s of shape-specific data) + uv_rect: [4]f32, // 64: texture UV sub-region (u_min, v_min, u_max, v_max) } -// Total: 60 bytes (padded to 64 for GPU alignment) +// Total: 80 bytes (std430 aligned) ``` `Shape_Params` is a `#raw_union` with named variants per shape kind (`rrect`, `circle`, `segment`, -etc.), ensuring type safety on the CPU side and zero-cost reinterpretation on the GPU side. +etc.), ensuring type safety on the CPU side and zero-cost reinterpretation on the GPU side. The +`uv_rect` field is used by textured SDF primitives (Shape_Flag.Textured); non-textured primitives +leave it zeroed. ### Draw submission order @@ -547,7 +572,7 @@ Within each scissor region, draws are issued in submission order to preserve the 2. Bind **main pipeline, tessellated mode** → draw all queued tessellated vertices (non-indexed for shapes, indexed for text). Pipeline state unchanged from today. 3. Bind **main pipeline, SDF mode** → draw all queued SDF primitives (instanced, one draw call). -4. If backdrop effects are present: copy render target, bind **backdrop-effects pipeline** → draw +4. If backdrop effects are present: copy render target, bind **backdrop pipeline** → draw backdrop primitives. The exact ordering within a scissor may be refined based on actual Z-ordering requirements. The key @@ -647,7 +672,7 @@ register-pressure analysis from the pipeline-strategy section above shows this i so both run at 100% occupancy. The remaining ALU difference (~15 extra instructions for the SDF evaluation) amounts to ~20μs at 4K — below noise. Meanwhile, splitting into a separate pipeline would add ~1–5μs per pipeline bind on the CPU side per scissor, matching or exceeding the GPU-side -savings. Within the main tier, unified remains strictly better. +savings. Within the main pipeline, unified remains strictly better. The naming convention follows the existing shape API: `rectangle_texture` and `rectangle_texture_corners` sit alongside `rectangle` and `rectangle_corners`, mirroring the @@ -725,6 +750,35 @@ The following are plumbed in the descriptor but not implemented in phase 1: expectation is that 3D rendering will use dedicated pipelines (separate from the 2D pipelines) sharing GPU resources (textures, samplers, command buffer lifecycle) with the 2D renderer. +## Multi-window support + +The renderer currently assumes a single window via the global `GLOB` state. Multi-window support is +deferred but anticipated. When revisited, the RAD Debugger's bucket + pass-list model +(`src/draw/draw.h`, `src/draw/draw.c` in EpicGamesExt/raddebugger) is worth studying as a reference. + +RAD separates draw submission from rendering via **buckets**. A `DR_Bucket` is an explicit handle +that accumulates an ordered list of render passes (`R_PassList`). The user creates a bucket, pushes +it onto a thread-local stack, issues draw calls (which target the top-of-stack bucket), then submits +the bucket to a specific window. Multiple buckets can exist simultaneously — one per window, or one +per UI panel that gets composited into a parent bucket via `dr_sub_bucket`. Implicit draw parameters +(clip rect, 2D transform, sampler mode, transparency) are managed via push/pop stacks scoped to each +bucket, so different windows can have independent clip and transform state without interference. + +The key properties this gives RAD: + +- **Per-window isolation.** Each window builds its own bucket with its own pass list and state stacks. + No global contention. +- **Thread-parallel building.** Each thread has its own draw context and arena. Multiple threads can + build buckets concurrently, then submit them to the render backend sequentially. +- **Compositing.** A pre-built bucket (e.g., a tooltip or overlay) can be injected into another + bucket with a transform applied, without rebuilding its draw calls. + +For our library, the likely adaptation would be replacing the single `GLOB` with a per-window draw +context that users create and pass to `begin`/`end`, while keeping the explicit-parameter draw call +style rather than adopting RAD's implicit state stacks. Texture and sampler resources would remain +global (shared across windows), with only the per-frame staging buffers and layer/scissor state +becoming per-context. + ## Building shaders GLSL shader sources live in `shaders/source/`. Compiled outputs (SPIR-V and Metal Shading Language) diff --git a/draw/draw.odin b/draw/draw.odin index 0cb0f82..0fb4934 100644 --- a/draw/draw.odin +++ b/draw/draw.odin @@ -63,15 +63,17 @@ Rectangle :: struct { } Sub_Batch_Kind :: enum u8 { - Shapes, // non-indexed, white texture, mode 0 + Shapes, // non-indexed, white texture or user texture, mode 0 Text, // indexed, atlas texture, mode 0 - SDF, // instanced unit quad, white texture, mode 1 + SDF, // instanced unit quad, white texture or user texture, mode 1 } Sub_Batch :: struct { - kind: Sub_Batch_Kind, - offset: u32, // Shapes: vertex offset; Text: text_batch index; SDF: primitive index - count: u32, // Shapes: vertex count; Text: always 1; SDF: primitive count + kind: Sub_Batch_Kind, + offset: u32, // Shapes: vertex offset; Text: text_batch index; SDF: primitive index + count: u32, // Shapes: vertex count; Text: always 1; SDF: primitive count + texture_id: Texture_Id, + sampler: Sampler_Preset, } Layer :: struct { @@ -95,35 +97,60 @@ Scissor :: struct { GLOB: Global Global :: struct { - odin_context: runtime.Context, - pipeline_2d_base: Pipeline_2D_Base, - text_cache: Text_Cache, - layers: [dynamic]Layer, - scissors: [dynamic]Scissor, - tmp_shape_verts: [dynamic]Vertex, - tmp_text_verts: [dynamic]Vertex, - tmp_text_indices: [dynamic]c.int, - tmp_text_batches: [dynamic]TextBatch, - tmp_primitives: [dynamic]Primitive, - tmp_sub_batches: [dynamic]Sub_Batch, - tmp_uncached_text: [dynamic]^sdl_ttf.Text, // Uncached TTF_Text objects to destroy after end() - clay_memory: [^]u8, - msaa_texture: ^sdl.GPUTexture, - curr_layer_index: uint, - max_layers: int, - max_scissors: int, - max_shape_verts: int, - max_text_verts: int, - max_text_indices: int, - max_text_batches: int, - max_primitives: int, - max_sub_batches: int, - dpi_scaling: f32, - msaa_width: u32, - msaa_height: u32, - sample_count: sdl.GPUSampleCount, - clay_z_index: i16, - cleared: bool, + // -- Per-frame staging (hottest — touched by every prepare/upload/clear cycle) -- + tmp_shape_verts: [dynamic]Vertex, // Tessellated shape vertices staged for GPU upload. + tmp_text_verts: [dynamic]Vertex, // Text vertices staged for GPU upload. + tmp_text_indices: [dynamic]c.int, // Text index buffer staged for GPU upload. + tmp_text_batches: [dynamic]TextBatch, // Text atlas batch metadata for indexed drawing. + tmp_primitives: [dynamic]Primitive, // SDF primitives staged for GPU storage buffer upload. + tmp_sub_batches: [dynamic]Sub_Batch, // Sub-batch records that drive draw call dispatch. + tmp_uncached_text: [dynamic]^sdl_ttf.Text, // Uncached TTF_Text objects destroyed after end() submits. + layers: [dynamic]Layer, // Draw layers, each with its own scissor stack. + scissors: [dynamic]Scissor, // Scissor rects that clip drawing within each layer. + + // -- Per-frame scalars (accessed during prepare and draw_layer) -- + curr_layer_index: uint, // Index of the currently active layer. + dpi_scaling: f32, // Window DPI scale factor applied to all pixel coordinates. + clay_z_index: i16, // Tracks z-index for layer splitting during Clay batch processing. + cleared: bool, // Whether the render target has been cleared this frame. + + // -- Pipeline (accessed every draw_layer call) -- + pipeline_2d_base: Pipeline_2D_Base, // The unified 2D GPU pipeline (shaders, buffers, samplers). + device: ^sdl.GPUDevice, // GPU device handle, stored at init. + samplers: [SAMPLER_PRESET_COUNT]^sdl.GPUSampler, // Lazily-created sampler objects, one per Sampler_Preset. + + // -- Deferred release (processed once per frame at frame boundary) -- + pending_texture_releases: [dynamic]Texture_Id, // Deferred GPU texture releases, processed next frame. + pending_text_releases: [dynamic]^sdl_ttf.Text, // Deferred TTF_Text destroys, processed next frame. + + // -- Textures (registration is occasional, binding is per draw call) -- + texture_slots: [dynamic]Texture_Slot, // Registered texture slots indexed by Texture_Id. + texture_free_list: [dynamic]u32, // Recycled slot indices available for reuse. + + // -- MSAA (once per frame in end()) -- + msaa_texture: ^sdl.GPUTexture, // Intermediate render target for multi-sample resolve. + msaa_width: u32, // Cached width to detect when MSAA texture needs recreation. + msaa_height: u32, // Cached height to detect when MSAA texture needs recreation. + sample_count: sdl.GPUSampleCount, // Sample count chosen at init (._1 means MSAA disabled). + + // -- Clay (once per frame in prepare_clay_batch) -- + clay_memory: [^]u8, // Raw memory block backing Clay's internal arena. + + // -- Text (occasional — font registration and text cache lookups) -- + text_cache: Text_Cache, // Font registry, SDL_ttf engine, and cached TTF_Text objects. + + // -- Resize tracking (cold — checked once per frame in resize_global) -- + max_layers: int, // High-water marks for dynamic array shrink heuristic. + max_scissors: int, + max_shape_verts: int, + max_text_verts: int, + max_text_indices: int, + max_text_batches: int, + max_primitives: int, + max_sub_batches: int, + + // -- Init-only (coldest — set once at init, never written again) -- + odin_context: runtime.Context, // Odin context captured at init for use in callbacks. } Init_Options :: struct { @@ -168,22 +195,30 @@ init :: proc( } GLOB = Global { - layers = make([dynamic]Layer, 0, INITIAL_LAYER_SIZE, allocator = allocator), - scissors = make([dynamic]Scissor, 0, INITIAL_SCISSOR_SIZE, allocator = allocator), - tmp_shape_verts = make([dynamic]Vertex, 0, BUFFER_INIT_SIZE, allocator = allocator), - tmp_text_verts = make([dynamic]Vertex, 0, BUFFER_INIT_SIZE, allocator = allocator), - tmp_text_indices = make([dynamic]c.int, 0, BUFFER_INIT_SIZE, allocator = allocator), - tmp_text_batches = make([dynamic]TextBatch, 0, BUFFER_INIT_SIZE, allocator = allocator), - tmp_primitives = make([dynamic]Primitive, 0, BUFFER_INIT_SIZE, allocator = allocator), - tmp_sub_batches = make([dynamic]Sub_Batch, 0, BUFFER_INIT_SIZE, allocator = allocator), - tmp_uncached_text = make([dynamic]^sdl_ttf.Text, 0, 16, allocator = allocator), - odin_context = odin_context, - dpi_scaling = sdl.GetWindowDisplayScale(window), - clay_memory = make([^]u8, min_memory_size, allocator = allocator), - sample_count = resolved_sample_count, - pipeline_2d_base = pipeline, - text_cache = text_cache, + layers = make([dynamic]Layer, 0, INITIAL_LAYER_SIZE, allocator = allocator), + scissors = make([dynamic]Scissor, 0, INITIAL_SCISSOR_SIZE, allocator = allocator), + tmp_shape_verts = make([dynamic]Vertex, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_text_verts = make([dynamic]Vertex, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_text_indices = make([dynamic]c.int, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_text_batches = make([dynamic]TextBatch, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_primitives = make([dynamic]Primitive, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_sub_batches = make([dynamic]Sub_Batch, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_uncached_text = make([dynamic]^sdl_ttf.Text, 0, 16, allocator = allocator), + device = device, + texture_slots = make([dynamic]Texture_Slot, 0, 16, allocator = allocator), + texture_free_list = make([dynamic]u32, 0, 16, allocator = allocator), + pending_texture_releases = make([dynamic]Texture_Id, 0, 16, allocator = allocator), + pending_text_releases = make([dynamic]^sdl_ttf.Text, 0, 16, allocator = allocator), + odin_context = odin_context, + dpi_scaling = sdl.GetWindowDisplayScale(window), + clay_memory = make([^]u8, min_memory_size, allocator = allocator), + sample_count = resolved_sample_count, + pipeline_2d_base = pipeline, + text_cache = text_cache, } + + // Reserve slot 0 for INVALID_TEXTURE + append(&GLOB.texture_slots, Texture_Slot{}) log.debug("Window DPI scaling:", GLOB.dpi_scaling) arena := clay.CreateArenaWithCapacityAndMemory(min_memory_size, GLOB.clay_memory) window_width, window_height: c.int @@ -230,12 +265,23 @@ destroy :: proc(device: ^sdl.GPUDevice, allocator := context.allocator) { if GLOB.msaa_texture != nil { sdl.ReleaseGPUTexture(device, GLOB.msaa_texture) } + process_pending_texture_releases() + destroy_all_textures() + destroy_sampler_pool() + for ttf_text in GLOB.pending_text_releases do sdl_ttf.DestroyText(ttf_text) + delete(GLOB.pending_text_releases) destroy_pipeline_2d_base(device, &GLOB.pipeline_2d_base) destroy_text_cache() } // Internal clear_global :: proc() { + // Process deferred texture releases from the previous frame + process_pending_texture_releases() + // Process deferred text releases from the previous frame + for ttf_text in GLOB.pending_text_releases do sdl_ttf.DestroyText(ttf_text) + clear(&GLOB.pending_text_releases) + GLOB.curr_layer_index = 0 GLOB.clay_z_index = 0 GLOB.cleared = false @@ -455,15 +501,24 @@ append_or_extend_sub_batch :: proc( kind: Sub_Batch_Kind, offset: u32, count: u32, + texture_id: Texture_Id = INVALID_TEXTURE, + sampler: Sampler_Preset = .Linear_Clamp, ) { if scissor.sub_batch_len > 0 { last := &GLOB.tmp_sub_batches[scissor.sub_batch_start + scissor.sub_batch_len - 1] - if last.kind == kind && kind != .Text && last.offset + last.count == offset { + if last.kind == kind && + kind != .Text && + last.offset + last.count == offset && + last.texture_id == texture_id && + last.sampler == sampler { last.count += count return } } - append(&GLOB.tmp_sub_batches, Sub_Batch{kind = kind, offset = offset, count = count}) + append( + &GLOB.tmp_sub_batches, + Sub_Batch{kind = kind, offset = offset, count = count, texture_id = texture_id, sampler = sampler}, + ) scissor.sub_batch_len += 1 layer.sub_batch_len += 1 } @@ -554,6 +609,46 @@ prepare_clay_batch :: proc( ) prepare_text(layer, Text{sdl_text, {bounds.x, bounds.y}, color_from_clay(render_data.textColor)}) case clay.RenderCommandType.Image: + render_data := render_command.renderData.image + if render_data.imageData == nil do continue + img_data := (^Clay_Image_Data)(render_data.imageData)^ + cr := render_data.cornerRadius + radii := [4]f32{cr.topLeft, cr.topRight, cr.bottomRight, cr.bottomLeft} + + // Background color behind the image (Clay allows it) + bg := color_from_clay(render_data.backgroundColor) + if bg[3] > 0 { + if radii == {0, 0, 0, 0} { + rectangle(layer, bounds, bg) + } else { + rectangle_corners(layer, bounds, radii, bg) + } + } + + // Compute fit UVs + uv, sampler, inner := fit_params(img_data.fit, bounds, img_data.texture_id) + + // Draw the image — route by cornerRadius + if radii == {0, 0, 0, 0} { + rectangle_texture( + layer, + inner, + img_data.texture_id, + tint = img_data.tint, + uv_rect = uv, + sampler = sampler, + ) + } else { + rectangle_texture_corners( + layer, + inner, + radii, + img_data.texture_id, + tint = img_data.tint, + uv_rect = uv, + sampler = sampler, + ) + } case clay.RenderCommandType.ScissorStart: if bounds.width == 0 || bounds.height == 0 do continue diff --git a/draw/draw_qr/draw_qr.odin b/draw/draw_qr/draw_qr.odin new file mode 100644 index 0000000..9fb3a0f --- /dev/null +++ b/draw/draw_qr/draw_qr.odin @@ -0,0 +1,78 @@ +package draw_qr + +import draw ".." +import "../../qrcode" + +// A registered QR code texture, ready for display via draw.rectangle_texture. +QR :: struct { + texture_id: draw.Texture_Id, + size: int, // modules per side (e.g. 21..177) +} + +// Encode text as a QR code and register the result as an R8 texture. +// The texture uses Nearest_Clamp sampling by default (sharp module edges). +// Returns ok=false if encoding or registration fails. +@(require_results) +create_from_text :: proc( + text: string, + ecl: qrcode.Ecc = .Low, + min_version: int = qrcode.VERSION_MIN, + max_version: int = qrcode.VERSION_MAX, + mask: Maybe(qrcode.Mask) = nil, + boost_ecl: bool = true, +) -> ( + qr: QR, + ok: bool, +) { + qrcode_buf: [qrcode.BUFFER_LEN_MAX]u8 + encode_ok := qrcode.encode(text, qrcode_buf[:], ecl, min_version, max_version, mask, boost_ecl) + if !encode_ok do return {}, false + return create(qrcode_buf[:]) +} + +// Register an already-encoded QR code buffer as an R8 texture. +// qrcode_buf must be the output of qrcode.encode (byte 0 = side length, remaining = bit-packed modules). +@(require_results) +create :: proc(qrcode_buf: []u8) -> (qr: QR, ok: bool) { + size := qrcode.get_size(qrcode_buf) + if size == 0 do return {}, false + + // Build R8 pixel buffer: 0 = light, 255 = dark + pixels := make([]u8, size * size, context.temp_allocator) + for y in 0 ..< size { + for x in 0 ..< size { + pixels[y * size + x] = 255 if qrcode.get_module(qrcode_buf, x, y) else 0 + } + } + + id, reg_ok := draw.register_texture( + draw.Texture_Desc { + width = u32(size), + height = u32(size), + depth_or_layers = 1, + type = .D2, + format = .R8_UNORM, + usage = {.SAMPLER}, + mip_levels = 1, + kind = .Static, + }, + pixels, + ) + if !reg_ok do return {}, false + + return QR{texture_id = id, size = size}, true +} + +// Release the GPU texture. +destroy :: proc(qr: ^QR) { + draw.unregister_texture(qr.texture_id) + qr.texture_id = draw.INVALID_TEXTURE + qr.size = 0 +} + +// Convenience: build a Clay_Image_Data for embedding a QR in Clay layouts. +// Uses Nearest_Clamp sampling (set via Sampler_Preset at draw time, not here) and Fit mode +// to preserve the QR's square aspect ratio. +clay_image :: proc(qr: QR, tint: draw.Color = draw.WHITE) -> draw.Clay_Image_Data { + return draw.clay_image_data(qr.texture_id, fit = .Fit, tint = tint) +} diff --git a/draw/examples/hellope.odin b/draw/examples/hellope.odin index 08026da..eb945bd 100644 --- a/draw/examples/hellope.odin +++ b/draw/examples/hellope.odin @@ -78,10 +78,11 @@ hellope_shapes :: proc() { draw.ellipse(base_layer, {410, 340}, 50, 30, {255, 200, 50, 255}, rotation = spin_angle) // Circle orbiting a point (moon orbiting planet) + // Convention B: center = pivot point (planet), origin = offset from moon center to pivot. + // Moon's visual center at rotation=0: planet_pos - origin = (100, 450) - (0, 40) = (100, 410). planet_pos := [2]f32{100, 450} - moon_pos := planet_pos + {0, -40} draw.circle(base_layer, planet_pos, 8, {200, 200, 200, 255}) // planet (stationary) - draw.circle(base_layer, moon_pos, 5, {100, 150, 255, 255}, origin = {0, 40}, rotation = spin_angle) // moon orbiting + draw.circle(base_layer, planet_pos, 5, {100, 150, 255, 255}, origin = {0, 40}, rotation = spin_angle) // moon orbiting // Ring arc rotating in place draw.ring(base_layer, {250, 450}, 15, 30, 0, 270, {100, 100, 220, 255}, rotation = spin_angle) diff --git a/draw/examples/main.odin b/draw/examples/main.odin index f8107eb..e3ee109 100644 --- a/draw/examples/main.odin +++ b/draw/examples/main.odin @@ -57,7 +57,7 @@ main :: proc() { args := os.args if len(args) < 2 { fmt.eprintln("Usage: examples ") - fmt.eprintln("Available examples: hellope-shapes, hellope-text, hellope-clay, hellope-custom") + fmt.eprintln("Available examples: hellope-shapes, hellope-text, hellope-clay, hellope-custom, textures") os.exit(1) } @@ -66,9 +66,10 @@ main :: proc() { case "hellope-custom": hellope_custom() case "hellope-shapes": hellope_shapes() case "hellope-text": hellope_text() + case "textures": textures() case: fmt.eprintf("Unknown example: %v\n", args[1]) - fmt.eprintln("Available examples: hellope-shapes, hellope-text, hellope-clay, hellope-custom") + fmt.eprintln("Available examples: hellope-shapes, hellope-text, hellope-clay, hellope-custom, textures") os.exit(1) } } diff --git a/draw/examples/textures.odin b/draw/examples/textures.odin new file mode 100644 index 0000000..ca53ba3 --- /dev/null +++ b/draw/examples/textures.odin @@ -0,0 +1,285 @@ +package examples + +import "../../draw" +import "../../draw/draw_qr" +import "core:math" +import "core:os" +import sdl "vendor:sdl3" + +textures :: proc() { + if !sdl.Init({.VIDEO}) do os.exit(1) + window := sdl.CreateWindow("Textures", 800, 600, {.HIGH_PIXEL_DENSITY}) + gpu := sdl.CreateGPUDevice(draw.PLATFORM_SHADER_FORMAT, true, nil) + if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1) + if !draw.init(gpu, window) do os.exit(1) + JETBRAINS_MONO_REGULAR = draw.register_font(JETBRAINS_MONO_REGULAR_RAW) + + FONT_SIZE :: u16(14) + LABEL_OFFSET :: f32(8) // gap between item and its label + + // ------------------------------------------------------------------------- + // Procedural checkerboard texture (8x8, RGBA8) + // ------------------------------------------------------------------------- + checker_size :: 8 + checker_pixels: [checker_size * checker_size * 4]u8 + for y in 0 ..< checker_size { + for x in 0 ..< checker_size { + i := (y * checker_size + x) * 4 + is_dark := ((x + y) % 2) == 0 + val: u8 = 40 if is_dark else 220 + checker_pixels[i + 0] = val // R + checker_pixels[i + 1] = val / 2 // G — slight color tint + checker_pixels[i + 2] = val // B + checker_pixels[i + 3] = 255 // A + } + } + checker_texture, _ := draw.register_texture( + draw.Texture_Desc { + width = checker_size, + height = checker_size, + depth_or_layers = 1, + type = .D2, + format = .R8G8B8A8_UNORM, + usage = {.SAMPLER}, + mip_levels = 1, + }, + checker_pixels[:], + ) + defer draw.unregister_texture(checker_texture) + + // ------------------------------------------------------------------------- + // Non-square gradient stripe texture (16x8, RGBA8) for fit mode demos + // ------------------------------------------------------------------------- + stripe_w :: 16 + stripe_h :: 8 + stripe_pixels: [stripe_w * stripe_h * 4]u8 + for y in 0 ..< stripe_h { + for x in 0 ..< stripe_w { + i := (y * stripe_w + x) * 4 + stripe_pixels[i + 0] = u8(x * 255 / (stripe_w - 1)) // R gradient left→right + stripe_pixels[i + 1] = u8(y * 255 / (stripe_h - 1)) // G gradient top→bottom + stripe_pixels[i + 2] = 128 // B constant + stripe_pixels[i + 3] = 255 // A + } + } + stripe_texture, _ := draw.register_texture( + draw.Texture_Desc { + width = stripe_w, + height = stripe_h, + depth_or_layers = 1, + type = .D2, + format = .R8G8B8A8_UNORM, + usage = {.SAMPLER}, + mip_levels = 1, + }, + stripe_pixels[:], + ) + defer draw.unregister_texture(stripe_texture) + + // ------------------------------------------------------------------------- + // QR code texture (R8_UNORM — see rendering note below) + // ------------------------------------------------------------------------- + qr, _ := draw_qr.create_from_text("https://odin-lang.org/") + defer draw_qr.destroy(&qr) + + spin_angle: f32 = 0 + + for { + defer free_all(context.temp_allocator) + ev: sdl.Event + for sdl.PollEvent(&ev) { + if ev.type == .QUIT do return + } + spin_angle += 1 + + base_layer := draw.begin({width = 800, height = 600}) + + // Background + draw.rectangle(base_layer, {0, 0, 800, 600}, {30, 30, 30, 255}) + + // ===================================================================== + // Row 1: Sampler presets (y=30) + // ===================================================================== + ROW1_Y :: f32(30) + ITEM_SIZE :: f32(120) + COL1 :: f32(30) + COL2 :: f32(180) + COL3 :: f32(330) + COL4 :: f32(480) + + // Nearest (sharp pixel edges) + draw.rectangle_texture( + base_layer, + {COL1, ROW1_Y, ITEM_SIZE, ITEM_SIZE}, + checker_texture, + sampler = .Nearest_Clamp, + ) + draw.text( + base_layer, + "Nearest", + {COL1, ROW1_Y + ITEM_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // Linear (bilinear blur) + draw.rectangle_texture( + base_layer, + {COL2, ROW1_Y, ITEM_SIZE, ITEM_SIZE}, + checker_texture, + sampler = .Linear_Clamp, + ) + draw.text( + base_layer, + "Linear", + {COL2, ROW1_Y + ITEM_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // Tiled (4x repeat) + draw.rectangle_texture( + base_layer, + {COL3, ROW1_Y, ITEM_SIZE, ITEM_SIZE}, + checker_texture, + sampler = .Nearest_Repeat, + uv_rect = {0, 0, 4, 4}, + ) + draw.text( + base_layer, + "Tiled 4x", + {COL3, ROW1_Y + ITEM_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // ===================================================================== + // Row 2: QR code, Rounded, Rotating (y=190) + // ===================================================================== + ROW2_Y :: f32(190) + + // QR code (R8_UNORM texture, nearest sampling) + // NOTE: R8_UNORM samples as (r, 0, 0, 1) in Metal's default swizzle. + // With WHITE tint: dark modules (R=1) → red, light modules (R=0) → black. + // The result is a red-on-black QR code. The white bg rect below is + // occluded by the fully-opaque texture but kept for illustration. + draw.rectangle(base_layer, {COL1, ROW2_Y, ITEM_SIZE, ITEM_SIZE}, {255, 255, 255, 255}) // white bg + draw.rectangle_texture( + base_layer, + {COL1, ROW2_Y, ITEM_SIZE, ITEM_SIZE}, + qr.texture_id, + sampler = .Nearest_Clamp, + ) + draw.text( + base_layer, + "QR Code", + {COL1, ROW2_Y + ITEM_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // Rounded corners + draw.rectangle_texture( + base_layer, + {COL2, ROW2_Y, ITEM_SIZE, ITEM_SIZE}, + checker_texture, + sampler = .Nearest_Clamp, + roundness = 0.3, + ) + draw.text( + base_layer, + "Rounded", + {COL2, ROW2_Y + ITEM_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // Rotating + rot_rect := draw.Rectangle{COL3, ROW2_Y, ITEM_SIZE, ITEM_SIZE} + draw.rectangle_texture( + base_layer, + rot_rect, + checker_texture, + sampler = .Nearest_Clamp, + origin = draw.center_of(rot_rect), + rotation = spin_angle, + ) + draw.text( + base_layer, + "Rotating", + {COL3, ROW2_Y + ITEM_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // ===================================================================== + // Row 3: Fit modes + Per-corner radii (y=360) + // ===================================================================== + ROW3_Y :: f32(360) + FIT_SIZE :: f32(120) // square target rect + + // Stretch + uv_s, sampler_s, inner_s := draw.fit_params(.Stretch, {COL1, ROW3_Y, FIT_SIZE, FIT_SIZE}, stripe_texture) + draw.rectangle(base_layer, {COL1, ROW3_Y, FIT_SIZE, FIT_SIZE}, {60, 60, 60, 255}) // bg + draw.rectangle_texture(base_layer, inner_s, stripe_texture, uv_rect = uv_s, sampler = sampler_s) + draw.text( + base_layer, + "Stretch", + {COL1, ROW3_Y + FIT_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // Fill (center-crop) + uv_f, sampler_f, inner_f := draw.fit_params(.Fill, {COL2, ROW3_Y, FIT_SIZE, FIT_SIZE}, stripe_texture) + draw.rectangle(base_layer, {COL2, ROW3_Y, FIT_SIZE, FIT_SIZE}, {60, 60, 60, 255}) + draw.rectangle_texture(base_layer, inner_f, stripe_texture, uv_rect = uv_f, sampler = sampler_f) + draw.text( + base_layer, + "Fill", + {COL2, ROW3_Y + FIT_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // Fit (letterbox) + uv_ft, sampler_ft, inner_ft := draw.fit_params(.Fit, {COL3, ROW3_Y, FIT_SIZE, FIT_SIZE}, stripe_texture) + draw.rectangle(base_layer, {COL3, ROW3_Y, FIT_SIZE, FIT_SIZE}, {60, 60, 60, 255}) // visible margin bg + draw.rectangle_texture(base_layer, inner_ft, stripe_texture, uv_rect = uv_ft, sampler = sampler_ft) + draw.text( + base_layer, + "Fit", + {COL3, ROW3_Y + FIT_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + // Per-corner radii + draw.rectangle_texture_corners( + base_layer, + {COL4, ROW3_Y, FIT_SIZE, FIT_SIZE}, + {20, 0, 20, 0}, + checker_texture, + sampler = .Nearest_Clamp, + ) + draw.text( + base_layer, + "Per-corner", + {COL4, ROW3_Y + FIT_SIZE + LABEL_OFFSET}, + JETBRAINS_MONO_REGULAR, + FONT_SIZE, + color = draw.WHITE, + ) + + draw.end(gpu, window) + } +} diff --git a/draw/pipeline_2d_base.odin b/draw/pipeline_2d_base.odin index 7b27ca2..a69facb 100644 --- a/draw/pipeline_2d_base.odin +++ b/draw/pipeline_2d_base.odin @@ -35,6 +35,7 @@ Shape_Kind :: enum u8 { Shape_Flag :: enum u8 { Stroke, + Textured, } Shape_Flags :: bit_set[Shape_Flag;u8] @@ -106,9 +107,10 @@ Primitive :: struct { rotation: f32, // 24: shader self-rotation in radians (used by RRect, Ellipse) _pad: f32, // 28: alignment to vec4 boundary params: Shape_Params, // 32: two vec4s of shape params + uv_rect: [4]f32, // 64: u_min, v_min, u_max, v_max (default {0,0,1,1}) } -#assert(size_of(Primitive) == 64) +#assert(size_of(Primitive) == 80) pack_kind_flags :: #force_inline proc(kind: Shape_Kind, flags: Shape_Flags) -> u32 { return u32(kind) | (u32(transmute(u8)flags) << 8) @@ -566,6 +568,7 @@ draw_layer :: proc( current_mode: Draw_Mode = .Tessellated current_vert_buf := main_vert_buf current_atlas: ^sdl.GPUTexture + current_sampler := sampler // Text vertices live after shape vertices in the GPU vertex buffer text_vertex_gpu_base := u32(len(GLOB.tmp_shape_verts)) @@ -584,14 +587,24 @@ draw_layer :: proc( sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = main_vert_buf, offset = 0}, 1) current_vert_buf = main_vert_buf } - if current_atlas != white_texture { + // Determine texture and sampler for this batch + batch_texture: ^sdl.GPUTexture = white_texture + batch_sampler: ^sdl.GPUSampler = sampler + if batch.texture_id != INVALID_TEXTURE { + if bound_texture := texture_gpu_handle(batch.texture_id); bound_texture != nil { + batch_texture = bound_texture + } + batch_sampler = get_sampler(batch.sampler) + } + if current_atlas != batch_texture || current_sampler != batch_sampler { sdl.BindGPUFragmentSamplers( render_pass, 0, - &sdl.GPUTextureSamplerBinding{texture = white_texture, sampler = sampler}, + &sdl.GPUTextureSamplerBinding{texture = batch_texture, sampler = batch_sampler}, 1, ) - current_atlas = white_texture + current_atlas = batch_texture + current_sampler = batch_sampler } sdl.DrawGPUPrimitives(render_pass, batch.count, 1, batch.offset, 0) @@ -632,14 +645,24 @@ draw_layer :: proc( sdl.BindGPUVertexBuffers(render_pass, 0, &sdl.GPUBufferBinding{buffer = unit_quad, offset = 0}, 1) current_vert_buf = unit_quad } - if current_atlas != white_texture { + // Determine texture and sampler for this batch + batch_texture: ^sdl.GPUTexture = white_texture + batch_sampler: ^sdl.GPUSampler = sampler + if batch.texture_id != INVALID_TEXTURE { + if bound_texture := texture_gpu_handle(batch.texture_id); bound_texture != nil { + batch_texture = bound_texture + } + batch_sampler = get_sampler(batch.sampler) + } + if current_atlas != batch_texture || current_sampler != batch_sampler { sdl.BindGPUFragmentSamplers( render_pass, 0, - &sdl.GPUTextureSamplerBinding{texture = white_texture, sampler = sampler}, + &sdl.GPUTextureSamplerBinding{texture = batch_texture, sampler = batch_sampler}, 1, ) - current_atlas = white_texture + current_atlas = batch_texture + current_sampler = batch_sampler } sdl.DrawGPUPrimitives(render_pass, 6, batch.count, 0, batch.offset) } diff --git a/draw/shaders/generated/base_2d.frag.metal b/draw/shaders/generated/base_2d.frag.metal index e03eb46..7a4b934 100644 --- a/draw/shaders/generated/base_2d.frag.metal +++ b/draw/shaders/generated/base_2d.frag.metal @@ -25,6 +25,7 @@ struct main0_in float4 f_params2 [[user(locn3)]]; uint f_kind_flags [[user(locn4)]]; float f_rotation [[user(locn5), flat]]; + float4 f_uv_rect [[user(locn6), flat]]; }; static inline __attribute__((always_inline)) @@ -69,6 +70,12 @@ float sdf_stroke(thread const float& d, thread const float& stroke_width) return abs(d) - (stroke_width * 0.5); } +static inline __attribute__((always_inline)) +float sdf_alpha(thread const float& d, thread const float& soft) +{ + return 1.0 - smoothstep(-soft, soft, d); +} + static inline __attribute__((always_inline)) float sdCircle(thread const float2& p, thread const float& r) { @@ -127,12 +134,6 @@ float sdSegment(thread const float2& p, thread const float2& a, thread const flo return length(pa - (ba * h)); } -static inline __attribute__((always_inline)) -float sdf_alpha(thread const float& d, thread const float& soft) -{ - return 1.0 - smoothstep(-soft, soft, d); -} - fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[texture(0)]], sampler texSmplr [[sampler(0)]]) { main0_out out = {}; @@ -169,6 +170,25 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur float param_6 = stroke_px; d = sdf_stroke(param_5, param_6); } + float4 shape_color = in.f_color; + if ((flags & 2u) != 0u) + { + float2 p_for_uv = in.f_local_or_uv; + if (in.f_rotation != 0.0) + { + float2 param_7 = p_for_uv; + float param_8 = in.f_rotation; + p_for_uv = apply_rotation(param_7, param_8); + } + float2 local_uv = ((p_for_uv / b) * 0.5) + float2(0.5); + float2 uv = mix(in.f_uv_rect.xy, in.f_uv_rect.zw, local_uv); + shape_color *= tex.sample(texSmplr, uv); + } + float param_9 = d; + float param_10 = soft; + float alpha = sdf_alpha(param_9, param_10); + out.out_color = float4(shape_color.xyz, shape_color.w * alpha); + return out; } else { @@ -177,14 +197,14 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur float radius = in.f_params.x; soft = fast::max(in.f_params.y, 1.0); float stroke_px_1 = in.f_params.z; - float2 param_7 = in.f_local_or_uv; - float param_8 = radius; - d = sdCircle(param_7, param_8); + float2 param_11 = in.f_local_or_uv; + float param_12 = radius; + d = sdCircle(param_11, param_12); if ((flags & 1u) != 0u) { - float param_9 = d; - float param_10 = stroke_px_1; - d = sdf_stroke(param_9, param_10); + float param_13 = d; + float param_14 = stroke_px_1; + d = sdf_stroke(param_13, param_14); } } else @@ -197,19 +217,19 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur float2 p_local_1 = in.f_local_or_uv; if (in.f_rotation != 0.0) { - float2 param_11 = p_local_1; - float param_12 = in.f_rotation; - p_local_1 = apply_rotation(param_11, param_12); + float2 param_15 = p_local_1; + float param_16 = in.f_rotation; + p_local_1 = apply_rotation(param_15, param_16); } - float2 param_13 = p_local_1; - float2 param_14 = ab; - float _560 = sdEllipse(param_13, param_14); - d = _560; + float2 param_17 = p_local_1; + float2 param_18 = ab; + float _616 = sdEllipse(param_17, param_18); + d = _616; if ((flags & 1u) != 0u) { - float param_15 = d; - float param_16 = stroke_px_2; - d = sdf_stroke(param_15, param_16); + float param_19 = d; + float param_20 = stroke_px_2; + d = sdf_stroke(param_19, param_20); } } else @@ -220,10 +240,10 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur float2 b_1 = in.f_params.zw; float width = in.f_params2.x; soft = fast::max(in.f_params2.y, 1.0); - float2 param_17 = in.f_local_or_uv; - float2 param_18 = a; - float2 param_19 = b_1; - d = sdSegment(param_17, param_18, param_19) - (width * 0.5); + float2 param_21 = in.f_local_or_uv; + float2 param_22 = a; + float2 param_23 = b_1; + d = sdSegment(param_21, param_22, param_23) - (width * 0.5); } else { @@ -243,16 +263,16 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur } float ang_start = mod(start_rad, 6.283185482025146484375); float ang_end = mod(end_rad, 6.283185482025146484375); - float _654; + float _710; if (ang_end > ang_start) { - _654 = float((angle >= ang_start) && (angle <= ang_end)); + _710 = float((angle >= ang_start) && (angle <= ang_end)); } else { - _654 = float((angle >= ang_start) || (angle <= ang_end)); + _710 = float((angle >= ang_start) || (angle <= ang_end)); } - float in_arc = _654; + float in_arc = _710; if (abs(ang_end - ang_start) >= 6.282185077667236328125) { in_arc = 1.0; @@ -277,9 +297,9 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur d = (length(p) * cos(bn)) - radius_1; if ((flags & 1u) != 0u) { - float param_20 = d; - float param_21 = stroke_px_3; - d = sdf_stroke(param_20, param_21); + float param_24 = d; + float param_25 = stroke_px_3; + d = sdf_stroke(param_24, param_25); } } } @@ -287,10 +307,9 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[textur } } } - float param_22 = d; - float param_23 = soft; - float alpha = sdf_alpha(param_22, param_23); - out.out_color = float4(in.f_color.xyz, in.f_color.w * alpha); + float param_26 = d; + float param_27 = soft; + float alpha_1 = sdf_alpha(param_26, param_27); + out.out_color = float4(in.f_color.xyz, in.f_color.w * alpha_1); return out; } - diff --git a/draw/shaders/generated/base_2d.frag.spv b/draw/shaders/generated/base_2d.frag.spv index 3917929..c1411b3 100644 Binary files a/draw/shaders/generated/base_2d.frag.spv and b/draw/shaders/generated/base_2d.frag.spv differ diff --git a/draw/shaders/generated/base_2d.vert.metal b/draw/shaders/generated/base_2d.vert.metal index b24ba01..75fa3b4 100644 --- a/draw/shaders/generated/base_2d.vert.metal +++ b/draw/shaders/generated/base_2d.vert.metal @@ -19,6 +19,7 @@ struct Primitive float _pad; float4 params; float4 params2; + float4 uv_rect; }; struct Primitive_1 @@ -30,6 +31,7 @@ struct Primitive_1 float _pad; float4 params; float4 params2; + float4 uv_rect; }; struct Primitives @@ -45,6 +47,7 @@ struct main0_out float4 f_params2 [[user(locn3)]]; uint f_kind_flags [[user(locn4)]]; float f_rotation [[user(locn5)]]; + float4 f_uv_rect [[user(locn6)]]; float4 gl_Position [[position]]; }; @@ -55,7 +58,7 @@ struct main0_in float4 v_color [[attribute(2)]]; }; -vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer(0)]], const device Primitives& _72 [[buffer(1)]], uint gl_InstanceIndex [[instance_id]]) +vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer(0)]], const device Primitives& _74 [[buffer(1)]], uint gl_InstanceIndex [[instance_id]]) { main0_out out = {}; if (_12.mode == 0u) @@ -66,18 +69,20 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer out.f_params2 = float4(0.0); out.f_kind_flags = 0u; out.f_rotation = 0.0; + out.f_uv_rect = float4(0.0, 0.0, 1.0, 1.0); out.gl_Position = _12.projection * float4(in.v_position * _12.dpi_scale, 0.0, 1.0); } else { Primitive p; - p.bounds = _72.primitives[int(gl_InstanceIndex)].bounds; - p.color = _72.primitives[int(gl_InstanceIndex)].color; - p.kind_flags = _72.primitives[int(gl_InstanceIndex)].kind_flags; - p.rotation = _72.primitives[int(gl_InstanceIndex)].rotation; - p._pad = _72.primitives[int(gl_InstanceIndex)]._pad; - p.params = _72.primitives[int(gl_InstanceIndex)].params; - p.params2 = _72.primitives[int(gl_InstanceIndex)].params2; + p.bounds = _74.primitives[int(gl_InstanceIndex)].bounds; + p.color = _74.primitives[int(gl_InstanceIndex)].color; + p.kind_flags = _74.primitives[int(gl_InstanceIndex)].kind_flags; + p.rotation = _74.primitives[int(gl_InstanceIndex)].rotation; + p._pad = _74.primitives[int(gl_InstanceIndex)]._pad; + p.params = _74.primitives[int(gl_InstanceIndex)].params; + p.params2 = _74.primitives[int(gl_InstanceIndex)].params2; + p.uv_rect = _74.primitives[int(gl_InstanceIndex)].uv_rect; float2 corner = in.v_position; float2 world_pos = mix(p.bounds.xy, p.bounds.zw, corner); float2 center = (p.bounds.xy + p.bounds.zw) * 0.5; @@ -87,8 +92,8 @@ vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer out.f_params2 = p.params2; out.f_kind_flags = p.kind_flags; out.f_rotation = p.rotation; + out.f_uv_rect = p.uv_rect; out.gl_Position = _12.projection * float4(world_pos * _12.dpi_scale, 0.0, 1.0); } return out; } - diff --git a/draw/shaders/generated/base_2d.vert.spv b/draw/shaders/generated/base_2d.vert.spv index c318fc2..ca08cba 100644 Binary files a/draw/shaders/generated/base_2d.vert.spv and b/draw/shaders/generated/base_2d.vert.spv differ diff --git a/draw/shaders/source/base_2d.frag b/draw/shaders/source/base_2d.frag index e6af939..cf301d5 100644 --- a/draw/shaders/source/base_2d.frag +++ b/draw/shaders/source/base_2d.frag @@ -7,6 +7,7 @@ layout(location = 2) in vec4 f_params; layout(location = 3) in vec4 f_params2; layout(location = 4) flat in uint f_kind_flags; layout(location = 5) flat in float f_rotation; +layout(location = 6) flat in vec4 f_uv_rect; // --- Output --- layout(location = 0) out vec4 out_color; @@ -130,6 +131,23 @@ void main() { d = sdRoundedBox(p_local, b, r); if ((flags & 1u) != 0u) d = sdf_stroke(d, stroke_px); + + // Texture sampling for textured SDF primitives + vec4 shape_color = f_color; + if ((flags & 2u) != 0u) { + // Compute UV from local position and half_size + vec2 p_for_uv = f_local_or_uv; + if (f_rotation != 0.0) { + p_for_uv = apply_rotation(p_for_uv, f_rotation); + } + vec2 local_uv = p_for_uv / b * 0.5 + 0.5; + vec2 uv = mix(f_uv_rect.xy, f_uv_rect.zw, local_uv); + shape_color *= texture(tex, uv); + } + + float alpha = sdf_alpha(d, soft); + out_color = vec4(shape_color.rgb, shape_color.a * alpha); + return; } else if (kind == 2u) { // Circle — rotationally symmetric, no rotation needed diff --git a/draw/shaders/source/base_2d.vert b/draw/shaders/source/base_2d.vert index e72aa3b..a43b51f 100644 --- a/draw/shaders/source/base_2d.vert +++ b/draw/shaders/source/base_2d.vert @@ -12,6 +12,7 @@ layout(location = 2) out vec4 f_params; layout(location = 3) out vec4 f_params2; layout(location = 4) flat out uint f_kind_flags; layout(location = 5) flat out float f_rotation; +layout(location = 6) flat out vec4 f_uv_rect; // ---------- Uniforms (single block — avoids spirv-cross reordering on Metal) ---------- layout(set = 1, binding = 0) uniform Uniforms { @@ -29,6 +30,7 @@ struct Primitive { float _pad; // 28-31: alignment padding vec4 params; // 32-47: shape params part 1 vec4 params2; // 48-63: shape params part 2 + vec4 uv_rect; // 64-79: u_min, v_min, u_max, v_max }; layout(std430, set = 0, binding = 0) readonly buffer Primitives { @@ -45,6 +47,7 @@ void main() { f_params2 = vec4(0.0); f_kind_flags = 0u; f_rotation = 0.0; + f_uv_rect = vec4(0.0, 0.0, 1.0, 1.0); gl_Position = projection * vec4(v_position * dpi_scale, 0.0, 1.0); } else { @@ -61,6 +64,7 @@ void main() { f_params2 = p.params2; f_kind_flags = p.kind_flags; f_rotation = p.rotation; + f_uv_rect = p.uv_rect; gl_Position = projection * vec4(world_pos * dpi_scale, 0.0, 1.0); } diff --git a/draw/shapes.odin b/draw/shapes.odin index 5a8b929..cca0140 100644 --- a/draw/shapes.odin +++ b/draw/shapes.odin @@ -68,6 +68,19 @@ emit_rectangle :: proc(x, y, width, height: f32, color: Color, vertices: []Verte vertices[offset + 5] = solid_vertex({x, y + height}, color) } +@(private = "file") +prepare_sdf_primitive_textured :: proc( + layer: ^Layer, + prim: Primitive, + texture_id: Texture_Id, + sampler: Sampler_Preset, +) { + offset := u32(len(GLOB.tmp_primitives)) + append(&GLOB.tmp_primitives, prim) + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + append_or_extend_sub_batch(scissor, layer, .SDF, offset, 1, texture_id, sampler) +} + // ----- Drawing functions ---- pixel :: proc(layer: ^Layer, pos: [2]f32, color: Color) { @@ -358,17 +371,20 @@ triangle_strip :: proc( // ----- SDF drawing functions ---- -// Compute new center position after rotating a center-parametrized shape -// around a pivot point. The pivot is at (center + origin) in world space. +// Compute the visual center of a center-parametrized shape after applying +// Convention B origin semantics: `center` is where the origin-point lands in +// world space; the visual center is offset by -origin and then rotated around +// the landing point. +// visual_center = center + R(θ) · (-origin) +// When θ=0: visual_center = center - origin (pure positioning shift). +// When origin={0,0}: visual_center = center (no change). @(private = "file") compute_pivot_center :: proc(center: [2]f32, origin: [2]f32, rotation_deg: f32) -> [2]f32 { if origin == {0, 0} do return center theta := math.to_radians(rotation_deg) cos_angle, sin_angle := math.cos(theta), math.sin(theta) - // pivot = center + origin; new_center = pivot + R(θ) * (center - pivot) return( center + - origin + {cos_angle * (-origin.x) - sin_angle * (-origin.y), sin_angle * (-origin.x) + cos_angle * (-origin.y)} \ ) } @@ -384,6 +400,13 @@ rotated_aabb_half_extents :: proc(half_width, half_height, rotation_radians: f32 // Draw a filled rectangle via SDF (analytical anti-aliasing at all orientations). // `roundness` is a 0–1 fraction controlling uniform corner rounding — 0 is sharp, 1 is fully rounded. // For per-corner pixel-precise rounding, use `rectangle_corners` instead. +// +// Origin semantics: +// `origin` is a local offset from the rect's top-left corner that selects both the positioning +// anchor and the rotation pivot. `rect.x, rect.y` specifies where that anchor point lands in +// world space. When `origin = {0, 0}` (default), `rect.x, rect.y` is the top-left corner. +// When `origin = center_of_rectangle(rect)`, `rect.x, rect.y` is the visual center. +// Rotation always occurs around the anchor point. rectangle :: proc( layer: ^Layer, rect: Rectangle, @@ -400,6 +423,7 @@ rectangle :: proc( // Draw a stroked rectangle via SDF (analytical anti-aliasing at all orientations). // `roundness` is a 0–1 fraction controlling uniform corner rounding — 0 is sharp, 1 is fully rounded. // For per-corner pixel-precise rounding, use `rectangle_corners_lines` instead. +// Origin semantics: see `rectangle`. rectangle_lines :: proc( layer: ^Layer, rect: Rectangle, @@ -415,6 +439,7 @@ rectangle_lines :: proc( } // Draw a rectangle with per-corner rounding radii via SDF. +// Origin semantics: see `rectangle`. rectangle_corners :: proc( layer: ^Layer, rect: Rectangle, @@ -436,12 +461,12 @@ rectangle_corners :: proc( half_width := rect.width * 0.5 half_height := rect.height * 0.5 rotation_radians: f32 = 0 - center_x := rect.x + half_width - center_y := rect.y + half_height + center_x := rect.x + half_width - origin.x + center_y := rect.y + half_height - origin.y if needs_transform(origin, rotation) { rotation_radians = math.to_radians(rotation) - transform := build_pivot_rotation({rect.x, rect.y}, origin, rotation) + transform := build_pivot_rotation({rect.x + origin.x, rect.y + origin.y}, origin, rotation) new_center := apply_transform(transform, {half_width, half_height}) center_x = new_center.x center_y = new_center.y @@ -480,6 +505,7 @@ rectangle_corners :: proc( } // Draw a stroked rectangle with per-corner rounding radii via SDF. +// Origin semantics: see `rectangle`. rectangle_corners_lines :: proc( layer: ^Layer, rect: Rectangle, @@ -502,12 +528,12 @@ rectangle_corners_lines :: proc( half_width := rect.width * 0.5 half_height := rect.height * 0.5 rotation_radians: f32 = 0 - center_x := rect.x + half_width - center_y := rect.y + half_height + center_x := rect.x + half_width - origin.x + center_y := rect.y + half_height - origin.y if needs_transform(origin, rotation) { rotation_radians = math.to_radians(rotation) - transform := build_pivot_rotation({rect.x, rect.y}, origin, rotation) + transform := build_pivot_rotation({rect.x + origin.x, rect.y + origin.y}, origin, rotation) new_center := apply_transform(transform, {half_width, half_height}) center_x = new_center.x center_y = new_center.y @@ -545,7 +571,114 @@ rectangle_corners_lines :: proc( prepare_sdf_primitive(layer, prim) } +// Draw a rectangle with a texture fill via SDF. Supports rounded corners via `roundness`, +// rotation, and analytical anti-aliasing on the shape silhouette. +// Origin semantics: see `rectangle`. +rectangle_texture :: proc( + layer: ^Layer, + rect: Rectangle, + id: Texture_Id, + tint: Color = WHITE, + uv_rect: Rectangle = {0, 0, 1, 1}, + sampler: Sampler_Preset = .Linear_Clamp, + roundness: f32 = 0, + origin: [2]f32 = {0, 0}, + rotation: f32 = 0, + soft_px: f32 = 1.0, +) { + cr := min(rect.width, rect.height) * clamp(roundness, 0, 1) * 0.5 + rectangle_texture_corners( + layer, + rect, + {cr, cr, cr, cr}, + id, + tint, + uv_rect, + sampler, + origin, + rotation, + soft_px, + ) +} + +// Draw a rectangle with a texture fill and per-corner rounding radii via SDF. +// Origin semantics: see `rectangle`. +rectangle_texture_corners :: proc( + layer: ^Layer, + rect: Rectangle, + radii: [4]f32, + id: Texture_Id, + tint: Color = WHITE, + uv_rect: Rectangle = {0, 0, 1, 1}, + sampler: Sampler_Preset = .Linear_Clamp, + origin: [2]f32 = {0, 0}, + rotation: f32 = 0, + soft_px: f32 = 1.0, +) { + max_radius := min(rect.width, rect.height) * 0.5 + top_left := clamp(radii[0], 0, max_radius) + top_right := clamp(radii[1], 0, max_radius) + bottom_right := clamp(radii[2], 0, max_radius) + bottom_left := clamp(radii[3], 0, max_radius) + + padding := soft_px / GLOB.dpi_scaling + dpi_scale := GLOB.dpi_scaling + + half_width := rect.width * 0.5 + half_height := rect.height * 0.5 + rotation_radians: f32 = 0 + center_x := rect.x + half_width - origin.x + center_y := rect.y + half_height - origin.y + + if needs_transform(origin, rotation) { + rotation_radians = math.to_radians(rotation) + transform := build_pivot_rotation({rect.x + origin.x, rect.y + origin.y}, origin, rotation) + new_center := apply_transform(transform, {half_width, half_height}) + center_x = new_center.x + center_y = new_center.y + } + + bounds_half_width, bounds_half_height := half_width, half_height + if rotation_radians != 0 { + expanded := rotated_aabb_half_extents(half_width, half_height, rotation_radians) + bounds_half_width = expanded.x + bounds_half_height = expanded.y + } + + prim := Primitive { + bounds = { + center_x - bounds_half_width - padding, + center_y - bounds_half_height - padding, + center_x + bounds_half_width + padding, + center_y + bounds_half_height + padding, + }, + color = tint, + kind_flags = pack_kind_flags(.RRect, {.Textured}), + rotation = rotation_radians, + uv_rect = {uv_rect.x, uv_rect.y, uv_rect.width, uv_rect.height}, + } + prim.params.rrect = RRect_Params { + half_size = {half_width * dpi_scale, half_height * dpi_scale}, + radii = { + top_right * dpi_scale, + bottom_right * dpi_scale, + top_left * dpi_scale, + bottom_left * dpi_scale, + }, + soft_px = soft_px, + stroke_px = 0, + } + prepare_sdf_primitive_textured(layer, prim, id, sampler) +} + // Draw a filled circle via SDF. +// +// Origin semantics (Convention B): +// `origin` is a local offset from the shape's center that selects both the positioning anchor +// and the rotation pivot. The `center` parameter specifies where that anchor point lands in +// world space. When `origin = {0, 0}` (default), `center` is the visual center. +// When `origin = {r, 0}`, the point `r` pixels to the right of the shape center lands at +// `center`, shifting the shape left by `r`. circle :: proc( layer: ^Layer, center: [2]f32, @@ -582,6 +715,7 @@ circle :: proc( } // Draw a stroked circle via SDF. +// Origin semantics: see `circle`. circle_lines :: proc( layer: ^Layer, center: [2]f32, @@ -619,6 +753,7 @@ circle_lines :: proc( } // Draw a filled ellipse via SDF. +// Origin semantics: see `circle`. ellipse :: proc( layer: ^Layer, center: [2]f32, @@ -665,6 +800,7 @@ ellipse :: proc( } // Draw a stroked ellipse via SDF. +// Origin semantics: see `circle`. ellipse_lines :: proc( layer: ^Layer, center: [2]f32, @@ -715,6 +851,7 @@ ellipse_lines :: proc( } // Draw a filled ring arc via SDF. +// Origin semantics: see `circle`. ring :: proc( layer: ^Layer, center: [2]f32, @@ -757,6 +894,7 @@ ring :: proc( } // Draw stroked ring arc outlines via SDF. +// Origin semantics: see `circle`. ring_lines :: proc( layer: ^Layer, center: [2]f32, diff --git a/draw/text.odin b/draw/text.odin index 7400b33..0a741b3 100644 --- a/draw/text.odin +++ b/draw/text.odin @@ -246,7 +246,7 @@ bottom_right_of_text :: proc(text_string: string, font_id: Font_Id, font_size: u // After calling this, subsequent text draws with an `id` will re-create their cache entries. clear_text_cache :: proc() { for _, sdl_text in GLOB.text_cache.cache { - sdl_ttf.DestroyText(sdl_text) + append(&GLOB.pending_text_releases, sdl_text) } clear(&GLOB.text_cache.cache) } @@ -259,7 +259,7 @@ clear_text_cache_entry :: proc(id: u32) { key := Cache_Key{id, .Custom} sdl_text, ok := GLOB.text_cache.cache[key] if ok { - sdl_ttf.DestroyText(sdl_text) + append(&GLOB.pending_text_releases, sdl_text) delete_key(&GLOB.text_cache.cache, key) } } diff --git a/draw/textures.odin b/draw/textures.odin new file mode 100644 index 0000000..64f636d --- /dev/null +++ b/draw/textures.odin @@ -0,0 +1,433 @@ +package draw + +import "core:log" +import "core:mem" +import sdl "vendor:sdl3" + +// --------------------------------------------------------------------------- +// Texture types +// --------------------------------------------------------------------------- + +Texture_Id :: distinct u32 +INVALID_TEXTURE :: Texture_Id(0) // Slot 0 is reserved/unused + +Texture_Kind :: enum u8 { + Static, // Uploaded once, never changes (QR codes, decoded PNGs, icons) + Dynamic, // Updatable via update_texture_region + Stream, // Frequent full re-uploads (video, procedural) +} + +Sampler_Preset :: enum u8 { + Nearest_Clamp, + Linear_Clamp, + Nearest_Repeat, + Linear_Repeat, +} + +SAMPLER_PRESET_COUNT :: 4 + +Fit_Mode :: enum u8 { + Stretch, // Fill rect, may distort aspect ratio (default) + Fit, // Preserve aspect, letterbox (may leave margins) + Fill, // Preserve aspect, center-crop (may crop edges) + Tile, // Repeat at native texture size + Center, // 1:1 pixel size, centered, no scaling +} + +Texture_Desc :: struct { + width: u32, + height: u32, + depth_or_layers: u32, + type: sdl.GPUTextureType, + format: sdl.GPUTextureFormat, + usage: sdl.GPUTextureUsageFlags, + mip_levels: u32, + kind: Texture_Kind, +} + +// Internal slot — not exported. +@(private) +Texture_Slot :: struct { + gpu_texture: ^sdl.GPUTexture, + desc: Texture_Desc, + generation: u32, +} + +// State stored in GLOB +// This file references: +// GLOB.device : ^sdl.GPUDevice +// GLOB.texture_slots : [dynamic]Texture_Slot +// GLOB.texture_free_list : [dynamic]u32 +// GLOB.pending_texture_releases : [dynamic]Texture_Id +// GLOB.samplers : [SAMPLER_PRESET_COUNT]^sdl.GPUSampler + +// --------------------------------------------------------------------------- +// Clay integration type +// --------------------------------------------------------------------------- + +Clay_Image_Data :: struct { + texture_id: Texture_Id, + fit: Fit_Mode, + tint: Color, +} + +clay_image_data :: proc(id: Texture_Id, fit: Fit_Mode = .Stretch, tint: Color = WHITE) -> Clay_Image_Data { + return {texture_id = id, fit = fit, tint = tint} +} + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +// Register a texture. Draw owns the GPU resource and releases it on unregister. +// `data` is tightly-packed row-major bytes matching desc.format. +// The caller may free `data` immediately after this proc returns. +@(require_results) +register_texture :: proc(desc: Texture_Desc, data: []u8) -> (id: Texture_Id, ok: bool) { + device := GLOB.device + if device == nil { + log.error("register_texture called before draw.init()") + return INVALID_TEXTURE, false + } + + assert(desc.width > 0, "Texture_Desc.width must be > 0") + assert(desc.height > 0, "Texture_Desc.height must be > 0") + assert(desc.depth_or_layers > 0, "Texture_Desc.depth_or_layers must be > 0") + assert(desc.mip_levels > 0, "Texture_Desc.mip_levels must be > 0") + assert(desc.usage != {}, "Texture_Desc.usage must not be empty (e.g. {.SAMPLER})") + + // Create the GPU texture + gpu_texture := sdl.CreateGPUTexture( + device, + sdl.GPUTextureCreateInfo { + type = desc.type, + format = desc.format, + usage = desc.usage, + width = desc.width, + height = desc.height, + layer_count_or_depth = desc.depth_or_layers, + num_levels = desc.mip_levels, + sample_count = ._1, + }, + ) + if gpu_texture == nil { + log.errorf("Failed to create GPU texture (%dx%d): %s", desc.width, desc.height, sdl.GetError()) + return INVALID_TEXTURE, false + } + + // Upload pixel data via a transfer buffer + if len(data) > 0 { + data_size := u32(len(data)) + transfer := sdl.CreateGPUTransferBuffer( + device, + sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = data_size}, + ) + if transfer == nil { + log.errorf("Failed to create texture transfer buffer: %s", sdl.GetError()) + sdl.ReleaseGPUTexture(device, gpu_texture) + return INVALID_TEXTURE, false + } + defer sdl.ReleaseGPUTransferBuffer(device, transfer) + + mapped := sdl.MapGPUTransferBuffer(device, transfer, false) + if mapped == nil { + log.errorf("Failed to map texture transfer buffer: %s", sdl.GetError()) + sdl.ReleaseGPUTexture(device, gpu_texture) + return INVALID_TEXTURE, false + } + mem.copy(mapped, raw_data(data), int(data_size)) + sdl.UnmapGPUTransferBuffer(device, transfer) + + cmd_buffer := sdl.AcquireGPUCommandBuffer(device) + if cmd_buffer == nil { + log.errorf("Failed to acquire command buffer for texture upload: %s", sdl.GetError()) + sdl.ReleaseGPUTexture(device, gpu_texture) + return INVALID_TEXTURE, false + } + copy_pass := sdl.BeginGPUCopyPass(cmd_buffer) + sdl.UploadToGPUTexture( + copy_pass, + sdl.GPUTextureTransferInfo{transfer_buffer = transfer}, + sdl.GPUTextureRegion{texture = gpu_texture, w = desc.width, h = desc.height, d = desc.depth_or_layers}, + false, + ) + sdl.EndGPUCopyPass(copy_pass) + if !sdl.SubmitGPUCommandBuffer(cmd_buffer) { + log.errorf("Failed to submit texture upload: %s", sdl.GetError()) + sdl.ReleaseGPUTexture(device, gpu_texture) + return INVALID_TEXTURE, false + } + } + + // Allocate a slot (reuse from free list or append) + slot_index: u32 + if len(GLOB.texture_free_list) > 0 { + slot_index = pop(&GLOB.texture_free_list) + GLOB.texture_slots[slot_index] = Texture_Slot { + gpu_texture = gpu_texture, + desc = desc, + generation = GLOB.texture_slots[slot_index].generation + 1, + } + } else { + slot_index = u32(len(GLOB.texture_slots)) + append(&GLOB.texture_slots, Texture_Slot{gpu_texture = gpu_texture, desc = desc, generation = 1}) + } + + return Texture_Id(slot_index), true +} + +// Queue a texture for release at the end of the current frame. +// The GPU resource is not freed immediately — see "Deferred release" in the README. +unregister_texture :: proc(id: Texture_Id) { + if id == INVALID_TEXTURE do return + append(&GLOB.pending_texture_releases, id) +} + +// Re-upload a sub-region of a Dynamic texture. +update_texture_region :: proc(id: Texture_Id, region: Rectangle, data: []u8) { + if id == INVALID_TEXTURE do return + slot := &GLOB.texture_slots[u32(id)] + if slot.gpu_texture == nil do return + + device := GLOB.device + data_size := u32(len(data)) + if data_size == 0 do return + + transfer := sdl.CreateGPUTransferBuffer( + device, + sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = data_size}, + ) + if transfer == nil { + log.errorf("Failed to create transfer buffer for texture region update: %s", sdl.GetError()) + return + } + defer sdl.ReleaseGPUTransferBuffer(device, transfer) + + mapped := sdl.MapGPUTransferBuffer(device, transfer, false) + if mapped == nil { + log.errorf("Failed to map transfer buffer for texture region update: %s", sdl.GetError()) + return + } + mem.copy(mapped, raw_data(data), int(data_size)) + sdl.UnmapGPUTransferBuffer(device, transfer) + + cmd_buffer := sdl.AcquireGPUCommandBuffer(device) + if cmd_buffer == nil { + log.errorf("Failed to acquire command buffer for texture region update: %s", sdl.GetError()) + return + } + copy_pass := sdl.BeginGPUCopyPass(cmd_buffer) + sdl.UploadToGPUTexture( + copy_pass, + sdl.GPUTextureTransferInfo{transfer_buffer = transfer}, + sdl.GPUTextureRegion { + texture = slot.gpu_texture, + x = u32(region.x), + y = u32(region.y), + w = u32(region.width), + h = u32(region.height), + d = 1, + }, + false, + ) + sdl.EndGPUCopyPass(copy_pass) + if !sdl.SubmitGPUCommandBuffer(cmd_buffer) { + log.errorf("Failed to submit texture region update: %s", sdl.GetError()) + } +} + +// --------------------------------------------------------------------------- +// Accessors +// --------------------------------------------------------------------------- + +texture_size :: proc(id: Texture_Id) -> [2]u32 { + if id == INVALID_TEXTURE do return {0, 0} + slot := &GLOB.texture_slots[u32(id)] + return {slot.desc.width, slot.desc.height} +} + +texture_format :: proc(id: Texture_Id) -> sdl.GPUTextureFormat { + if id == INVALID_TEXTURE do return .INVALID + return GLOB.texture_slots[u32(id)].desc.format +} + +texture_kind :: proc(id: Texture_Id) -> Texture_Kind { + if id == INVALID_TEXTURE do return .Static + return GLOB.texture_slots[u32(id)].desc.kind +} + +// Internal: get the raw GPU texture pointer for binding during draw. +@(private) +texture_gpu_handle :: proc(id: Texture_Id) -> ^sdl.GPUTexture { + if id == INVALID_TEXTURE do return nil + idx := u32(id) + if idx >= u32(len(GLOB.texture_slots)) do return nil + return GLOB.texture_slots[idx].gpu_texture +} + +// --------------------------------------------------------------------------- +// Deferred release (called from draw.end / clear_global) +// --------------------------------------------------------------------------- + +@(private) +process_pending_texture_releases :: proc() { + device := GLOB.device + for id in GLOB.pending_texture_releases { + idx := u32(id) + if idx >= u32(len(GLOB.texture_slots)) do continue + slot := &GLOB.texture_slots[idx] + if slot.gpu_texture != nil { + sdl.ReleaseGPUTexture(device, slot.gpu_texture) + slot.gpu_texture = nil + } + slot.generation += 1 + append(&GLOB.texture_free_list, idx) + } + clear(&GLOB.pending_texture_releases) +} + +// --------------------------------------------------------------------------- +// Sampler pool +// --------------------------------------------------------------------------- + +@(private) +get_sampler :: proc(preset: Sampler_Preset) -> ^sdl.GPUSampler { + idx := int(preset) + if GLOB.samplers[idx] != nil do return GLOB.samplers[idx] + + // Lazily create + min_filter, mag_filter: sdl.GPUFilter + address_mode: sdl.GPUSamplerAddressMode + + switch preset { + case .Nearest_Clamp: + min_filter = .NEAREST; mag_filter = .NEAREST; address_mode = .CLAMP_TO_EDGE + case .Linear_Clamp: + min_filter = .LINEAR; mag_filter = .LINEAR; address_mode = .CLAMP_TO_EDGE + case .Nearest_Repeat: + min_filter = .NEAREST; mag_filter = .NEAREST; address_mode = .REPEAT + case .Linear_Repeat: + min_filter = .LINEAR; mag_filter = .LINEAR; address_mode = .REPEAT + } + + sampler := sdl.CreateGPUSampler( + GLOB.device, + sdl.GPUSamplerCreateInfo { + min_filter = min_filter, + mag_filter = mag_filter, + mipmap_mode = .LINEAR, + address_mode_u = address_mode, + address_mode_v = address_mode, + address_mode_w = address_mode, + }, + ) + if sampler == nil { + log.errorf("Failed to create sampler preset %v: %s", preset, sdl.GetError()) + return GLOB.pipeline_2d_base.sampler // fallback to existing default sampler + } + + GLOB.samplers[idx] = sampler + return sampler +} + +// Internal: destroy all sampler pool entries. Called from draw.destroy(). +@(private) +destroy_sampler_pool :: proc() { + device := GLOB.device + for &s in GLOB.samplers { + if s != nil { + sdl.ReleaseGPUSampler(device, s) + s = nil + } + } +} + +// Internal: destroy all registered textures. Called from draw.destroy(). +@(private) +destroy_all_textures :: proc() { + device := GLOB.device + for &slot in GLOB.texture_slots { + if slot.gpu_texture != nil { + sdl.ReleaseGPUTexture(device, slot.gpu_texture) + slot.gpu_texture = nil + } + } + delete(GLOB.texture_slots) + delete(GLOB.texture_free_list) + delete(GLOB.pending_texture_releases) +} + +// --------------------------------------------------------------------------- +// Fit mode helper +// --------------------------------------------------------------------------- + +// Compute UV rect, recommended sampler, and inner rect for a given fit mode. +// `rect` is the target drawing area; `texture_id` identifies the texture whose +// pixel dimensions are looked up via texture_size(). +// For Fit mode, `inner_rect` is smaller than `rect` (centered). For all other modes, `inner_rect == rect`. +fit_params :: proc( + fit: Fit_Mode, + rect: Rectangle, + texture_id: Texture_Id, +) -> ( + uv_rect: Rectangle, + sampler: Sampler_Preset, + inner_rect: Rectangle, +) { + size := texture_size(texture_id) + texture_width := f32(size.x) + texture_height := f32(size.y) + rect_width := rect.width + rect_height := rect.height + inner_rect = rect + + if texture_width == 0 || texture_height == 0 || rect_width == 0 || rect_height == 0 { + return {0, 0, 1, 1}, .Linear_Clamp, inner_rect + } + + texture_aspect := texture_width / texture_height + rect_aspect := rect_width / rect_height + + switch fit { + case .Stretch: return {0, 0, 1, 1}, .Linear_Clamp, inner_rect + + case .Fill: if texture_aspect > rect_aspect { + // Texture wider than rect — crop sides + scale := rect_aspect / texture_aspect + margin := (1 - scale) * 0.5 + return {margin, 0, 1 - margin, 1}, .Linear_Clamp, inner_rect + } else { + // Texture taller than rect — crop top/bottom + scale := texture_aspect / rect_aspect + margin := (1 - scale) * 0.5 + return {0, margin, 1, 1 - margin}, .Linear_Clamp, inner_rect + } + + case .Fit: + // Preserve aspect, fit inside rect. Returns a shrunken inner_rect. + if texture_aspect > rect_aspect { + // Image wider — letterbox top/bottom + fit_height := rect_width / texture_aspect + padding := (rect_height - fit_height) * 0.5 + inner_rect = Rectangle{rect.x, rect.y + padding, rect_width, fit_height} + } else { + // Image taller — letterbox left/right + fit_width := rect_height * texture_aspect + padding := (rect_width - fit_width) * 0.5 + inner_rect = Rectangle{rect.x + padding, rect.y, fit_width, rect_height} + } + return {0, 0, 1, 1}, .Linear_Clamp, inner_rect + + case .Tile: + uv_width := rect_width / texture_width + uv_height := rect_height / texture_height + return {0, 0, uv_width, uv_height}, .Linear_Repeat, inner_rect + + case .Center: + u_half := rect_width / (2 * texture_width) + v_half := rect_height / (2 * texture_height) + return {0.5 - u_half, 0.5 - v_half, 0.5 + u_half, 0.5 + v_half}, .Nearest_Clamp, inner_rect + } + + return {0, 0, 1, 1}, .Linear_Clamp, inner_rect +}