diff --git a/.zed/tasks.json b/.zed/tasks.json index 8fc5867..13a618b 100644 --- a/.zed/tasks.json +++ b/.zed/tasks.json @@ -38,13 +38,23 @@ "cwd": "$ZED_WORKTREE_ROOT", }, // --------------------------------------------------------------------------------------------------------------------- - // ----- LMDB Examples ------------------------ + // ----- Examples ------------------------ // --------------------------------------------------------------------------------------------------------------------- { "label": "Run lmdb example", "command": "odin run vendor/lmdb/examples -debug -out=out/debug/lmdb-examples", "cwd": "$ZED_WORKTREE_ROOT", }, + { + "label": "Run draw hellope-clay example", + "command": "odin run draw/examples -debug -out=out/debug/draw-examples -- hellope-clay", + "cwd": "$ZED_WORKTREE_ROOT", + }, + { + "label": "Run draw hellope-shapes example", + "command": "odin run draw/examples -debug -out=out/debug/draw-examples -- hellope-shapes", + "cwd": "$ZED_WORKTREE_ROOT", + }, // --------------------------------------------------------------------------------------------------------------------- // ----- Other ------------------------ // --------------------------------------------------------------------------------------------------------------------- diff --git a/README.md b/README.md index 7fe8894..1949722 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,19 @@ # LevLib Narya + BFPOWER unified Odin library collection. + +## Meta Tools + +The `meta/` package contains build tools that can be run from the project root: + +``` +odin run meta -- +``` + +Running with no arguments prints available commands. + +### Commands + +| Command | Description | +| ------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `gen-shaders` | Compile all GLSL shaders in `draw/shaders/source/` to SPIR-V and Metal Shading Language, writing results to `draw/shaders/generated/`. Requires `glslangValidator` and `spirv-cross` on PATH. | diff --git a/draw/README.md b/draw/README.md new file mode 100644 index 0000000..e6a3d8a --- /dev/null +++ b/draw/README.md @@ -0,0 +1,557 @@ +# draw + +2D rendering library built on SDL3 GPU, providing a unified shape-drawing and text-rendering API with +Clay UI integration. + +## Current state + +The renderer uses a single unified `Pipeline_2D_Base` (`TRIANGLELIST` pipeline) with two submission +modes dispatched by a push constant: + +- **Mode 0 (Tessellated):** Vertex buffer contains real geometry. Used for text (indexed draws into + SDL_ttf atlas textures), axis-aligned sharp-corner rectangles (already optimal as 2 triangles), + per-vertex color gradients (`rectangle_gradient`, `circle_gradient`), angular-clipped circle + sectors (`circle_sector`), and arbitrary user geometry (`triangle`, `triangle_fan`, + `triangle_strip`). The fragment shader computes `out = color * texture(tex, uv)`. + +- **Mode 1 (SDF):** A static 6-vertex unit-quad buffer is drawn instanced, with per-primitive + `Primitive` structs uploaded each frame to a GPU storage buffer. The vertex shader reads + `primitives[gl_InstanceIndex]`, computes world-space position from unit quad corners + primitive + bounds. The fragment shader dispatches on `Shape_Kind` to evaluate the correct signed distance + function analytically. + +Seven SDF shape kinds are implemented: + +1. **RRect** — rounded rectangle with per-corner radii (iq's `sdRoundedBox`) +2. **Circle** — filled or stroked circle +3. **Ellipse** — exact signed-distance ellipse (iq's iterative `sdEllipse`) +4. **Segment** — capsule-style line segment with rounded caps +5. **Ring_Arc** — annular ring with angular clipping for arcs +6. **NGon** — regular polygon with arbitrary side count and rotation +7. **Polyline** — decomposed into independent `Segment` primitives per adjacent point pair + +All SDF shapes support fill and stroke modes via `Shape_Flags`, and produce mathematically exact +curves with analytical anti-aliasing via `smoothstep` — no tessellation, no piecewise-linear +approximation. A rounded rectangle is 1 primitive (64 bytes) instead of ~250 vertices (~5000 bytes). + +MSAA is opt-in (default `._1`, no MSAA) via `Init_Options.msaa_samples`. SDF rendering does not +benefit from MSAA because fragment coverage is computed analytically. MSAA remains useful for text +glyph edges and tessellated user geometry if desired. + +## 2D rendering pipeline plan + +This section documents the planned architecture for levlib's 2D rendering system. The design is driven +by three goals: **draw quality** (mathematically exact curves with perfect anti-aliasing), **efficiency** +(minimal vertex bandwidth, high GPU occupancy, low draw-call count), and **extensibility** (new +primitives and effects can be added to the library without architectural changes). + +### Overview: three pipelines + +The 2D renderer will use three GPU pipelines, split by **register pressure compatibility** and +**render-state requirements**: + +1. **Main pipeline** — shapes (SDF and tessellated) and text. Low register footprint (~18–22 + registers per thread). Runs at high GPU occupancy. Handles 90%+ of all fragments in a typical + frame. + +2. **Effects pipeline** — drop shadows, inner shadows, outer glow, and similar ALU-bound blur + effects. Medium register footprint (~48–60 registers). Each effects primitive includes the base + shape's SDF so that it can draw both the effect and the shape in a single fragment pass, avoiding + redundant overdraw. + +3. **Backdrop-effects pipeline** — frosted glass, refraction, and any effect that samples the current + render target as input. High register footprint (~70–80 registers) and structurally requires a + `CopyGPUTextureToTexture` from the render target before drawing. Separated both for register + pressure and because the texture-copy requirement forces a render-pass-level state change. + +A typical UI frame with no effects uses 1 pipeline bind and 0 switches. A frame with drop shadows +uses 2 pipelines and 1 switch. A frame with shadows and frosted glass uses all 3 pipelines and 2 +switches plus 1 texture copy. At ~5μs per pipeline bind on modern APIs, worst-case switching overhead +is under 0.15% of an 8.3ms (120 FPS) frame budget. + +### Why three pipelines, not one or seven + +The natural question is whether we should use a single unified pipeline (fewer state changes, simpler +code) or many per-primitive-type pipelines (no branching overhead, lean per-shader register usage). + +The dominant cost factor is **GPU register pressure**, not pipeline switching overhead or fragment +shader branching. A GPU shader core has a fixed register pool shared among all concurrent threads. The +compiler allocates registers pessimistically based on the worst-case path through the shader. If the +shader contains both a 20-register RRect SDF and a 72-register frosted-glass blur, _every_ fragment +— even trivial RRects — is allocated 72 registers. This directly reduces **occupancy** (the number of +warps that can run simultaneously), which reduces the GPU's ability to hide memory latency. + +Concrete example on a modern NVIDIA SM with 65,536 registers: + +| Register allocation | Max concurrent threads | Occupancy | +| ------------------------- | ---------------------- | --------- | +| 20 regs (RRect only) | 3,276 | ~100% | +| 48 regs (+ drop shadow) | 1,365 | ~42% | +| 72 regs (+ frosted glass) | 910 | ~28% | + +For a 4K frame (3840×2160) at 1.5× overdraw (~12.4M fragments), running all fragments at 28% +occupancy instead of 100% roughly triples fragment shading time. At 4K this is severe: if the main +pipeline's fragment work at full occupancy takes ~2ms, a single unified shader containing the glass +branch would push it to ~6ms — consuming 72% of the 8.3ms budget available at 120 FPS and leaving +almost nothing for CPU work, uploads, and presentation. This is a per-frame multiplier, not a +per-primitive cost — it applies even when the heavy branch is never taken. + +The three-pipeline split groups primitives by register footprint so that: + +- Main pipeline (~20 regs): 90%+ of fragments run at near-full occupancy. +- Effects pipeline (~55 regs): shadow/glow fragments run at moderate occupancy; unavoidable given the + blur math complexity. +- Backdrop-effects pipeline (~75 regs): glass fragments run at low occupancy; also unavoidable, and + structurally separated anyway by the texture-copy requirement. + +This avoids the register-pressure tax of a single unified shader while keeping pipeline count minimal +(3 vs. Zed GPUI's 7). The effects that drag occupancy down are isolated to the fragments that +actually need them. + +**Why not per-primitive-type pipelines (GPUI's approach)?** Zed's GPUI uses 7 separate shader pairs: +quad, shadow, underline, monochrome sprite, polychrome sprite, path, surface. This eliminates all +branching and gives each shader minimal register usage. Three concrete costs make this approach wrong +for our use case: + +**Draw call count scales with kind variety, not just scissor count.** With a unified pipeline, +one instanced draw call per scissor covers all primitive kinds from a single storage buffer. With +per-kind pipelines, each scissor requires one draw call and one pipeline bind per kind used. For a +typical UI frame with 15 scissors and 3–4 primitive kinds per scissor, per-kind splitting produces +~45–60 draw calls and pipeline binds; our unified approach produces ~15–20 draw calls and 1–5 +pipeline binds. At ~5μs each for CPU-side command encoding on modern APIs, per-kind splitting adds +375–500μs of CPU overhead per frame — **4.5–6% of an 8.3ms (120 FPS) budget** — with no +compensating GPU-side benefit, because the register-pressure savings within the simple-SDF tier are +negligible (all members cluster at 12–22 registers). + +**Z-order preservation forces the API to expose layers.** With a single pipeline drawing all kinds +from one storage buffer, submission order equals draw order — Clay's painterly render commands flow +through without reordering. With separate pipelines per kind, primitives can only batch with +same-kind neighbors, which means interleaved kinds (e.g., `[rrect, circle, text, rrect, text]`) must +either issue one draw call per primitive (defeating batching entirely) or force the user to pre-sort +by kind and reason about explicit layers. GPUI chose the latter, baking layer semantics into their +API where each layer draws shadows before quads before glyphs. Our design avoids this constraint: +submission order is draw order, no layer juggling required. + +**PSO compilation costs multiply.** Each pipeline takes 1–50ms to compile on Metal/Vulkan/D3D12 at +first use. 7 pipelines is ~175ms cold startup; 3 pipelines is ~75ms. Adding state axes (MSAA +variants, blend modes, color formats) multiplies combinatorially — a 2.3× larger variant matrix per +additional axis with 7 pipelines vs 3. + +**Branching cost comparison: unified vs per-kind in the effects pipeline.** The effects pipeline is +the strongest candidate for per-kind splitting because effect branches are heavier than shape +branches (~80 instructions for drop shadow vs ~20 for an SDF). Even here, per-kind splitting loses. +Consider a worst-case scissor with 15 drop-shadowed cards and 2 inner-shadowed elements interleaved +in submission order: + +- _Unified effects pipeline (our plan):_ 1 pipeline bind, 1 instanced draw call. Category-3 + divergence occurs at drop-shadow/inner-shadow boundaries where ~4 warps straddle per boundary × 2 + boundaries = ~8 divergent warps out of ~19,924 total (0.04%). Each divergent warp pays ~80 extra + instructions. Total divergence cost: 8 × 32 × 80 / 12G inst/sec ≈ **1.7μs**. + +- _Per-kind effects pipelines (GPUI-style):_ 2 pipeline binds + 2 draw calls. But submission order + is `[drop, drop, inner, drop, drop, inner, drop, ...]` — the two inner-shadow primitives split the + drop-shadow run into three segments. To preserve Z-order, this requires 5 draw calls and 4 pipeline + switches, not 2. Cost: 5 × 5μs + 4 × 5μs = **45μs**. + + The per-kind approach costs **26× more** than the unified approach's divergence penalty (45μs vs + 1.7μs), while eliminating only 0.04% warp divergence that was already negligible. Even in the most + extreme stacked-effects scenario (10 cards each with both drop shadow and inner shadow, producing + ~60 boundary warps at ~80 extra instructions each), unified divergence costs ~13μs — still 3.5× + cheaper than the pipeline-switching alternative. + +The split we _do_ perform (main / effects / backdrop-effects) is motivated by register-pressure tier +boundaries where occupancy differences are catastrophic at 4K (see numbers above). Within a tier, +unified is strictly better by every measure: fewer draw calls, simpler Z-order, lower CPU overhead, +and negligible GPU-side branching cost. + +**References:** + +- Zed GPUI blog post on their per-primitive pipeline architecture: + https://zed.dev/blog/videogame +- Zed GPUI Metal shader source (7 shader pairs): + https://github.com/zed-industries/zed/blob/cb6fc11/crates/gpui/src/platform/mac/shaders.metal +- NVIDIA Nsight Graphics 2024.3 documentation on active-threads-per-warp and divergence analysis: + https://developer.nvidia.com/blog/optimize-gpu-workloads-for-graphics-applications-with-nvidia-nsight-graphics/ + +### Why fragment shader branching is safe in this design + +There is longstanding folklore that "branches in shaders are bad." This was true on pre-2010 hardware +where shader cores had no branch instructions at all — compilers emitted code for both sides of every +branch and used conditional select to pick the result. On modern GPUs (everything from ~2012 onward), +this is no longer the case. Native dynamic branching is fully supported on all current hardware. +However, branching _can_ still be costly in specific circumstances. Understanding which circumstances +apply to our design — and which do not — is critical to justifying the unified-pipeline approach. + +#### How GPU branching works + +GPUs execute fragment shaders in **warps** (NVIDIA/Intel, 32 threads) or **wavefronts** (AMD, 32 or +64 threads). All threads in a warp execute the same instruction simultaneously (SIMT model). When a +branch condition evaluates the same way for every thread in a warp, the GPU simply jumps to the taken +path and skips the other — **zero cost**, identical to a CPU branch. This is called a **uniform +branch** or **warp-coherent branch**. + +When threads within the same warp disagree on which path to take, the warp must execute both paths +sequentially, masking off threads that don't belong to the active path. This is called **warp +divergence** and it causes the warp to pay the cost of both sides of the branch. In the worst case +(50/50 split), throughput halves for that warp. + +There are three categories of branch condition in a fragment shader, ranked by cost: + +| Category | Condition source | GPU behavior | Cost | +| -------------------------------- | ----------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | --------------------- | +| **Compile-time constant** | `#ifdef`, `const bool` | Dead code eliminated by compiler | Zero | +| **Uniform / push constant** | Same value for entire draw call | Warp-coherent; GPU skips dead path | Effectively zero | +| **Per-primitive `flat` varying** | Same value across all fragments of a primitive | Warp-coherent for all warps fully inside one primitive; divergent only at primitive boundaries | Near zero (see below) | +| **Per-fragment varying** | Different value per pixel (e.g., texture lookup, screen position) | Potentially divergent within every warp | Can be expensive | + +#### Which category our branches fall into + +Our design has two branch points: + +1. **`mode` (push constant): tessellated vs. SDF.** This is category 2 — uniform per draw call. + Every thread in every warp of a draw call sees the same `mode` value. **Zero divergence, zero + cost.** + +2. **`shape_kind` (flat varying from storage buffer): which SDF to evaluate.** This is category 3. + The `flat` interpolation qualifier ensures that all fragments rasterized from one primitive's quad + receive the same `shape_kind` value. Divergence can only occur at the **boundary between two + adjacent primitives of different kinds**, where the rasterizer might pack fragments from both + primitives into the same warp. + +For category 3, the divergence analysis depends on primitive size: + +- **Large primitives** (buttons, panels, containers — 50+ pixels on a side): a 200×100 rect + produces ~20,000 fragments = ~625 warps. At most ~4 boundary warps might straddle a neighbor of a + different kind. Divergence rate: **0.6%** of warps. + +- **Small primitives** (icons, dots — 16×16): 256 fragments = ~8 warps. At most 2 boundary warps + diverge. Divergence rate: **25%** of warps for that primitive, but the primitive itself covers a + tiny fraction of the frame's total fragments. + +- **Worst realistic case**: a dense grid of alternating shape kinds (e.g., circle-rect-circle-rect + icons). Even here, the interior warps of each primitive are coherent. Only the edges diverge. Total + frame-level divergence is typically **1–3%** of all warps. + +At 1–3% divergence, the throughput impact is negligible. At 4K with 12.4M total fragments +(~387,000 warps), divergent boundary warps number in the low thousands. Each divergent warp pays at +most ~25 extra instructions (the cost of the longest untaken SDF branch). At ~12G instructions/sec +on a mid-range GPU, that totals ~4μs — under 0.05% of an 8.3ms (120 FPS) frame budget. This is +confirmed by production renderers that use exactly this pattern: + +- **vger / vger-rs** (Audulus): single pipeline, 11 primitive kinds dispatched by a `switch` on a + flat varying `prim_type`. Ships at 120 FPS on iPads. The author (Taylor Holliday) replaced nanovg + specifically because CPU-side tessellation was the bottleneck, not fragment branching: + https://github.com/audulus/vger-rs + +- **Randy Gaul's 2D renderer**: single pipeline with `shape_type` encoded as a vertex attribute. + Reports that warp divergence "really hasn't been an issue for any game I've seen so far" because + "games tend to draw a lot of the same shape type": + https://randygaul.github.io/graphics/2025/03/04/2D-Rendering-SDF-and-Atlases.html + +#### What kind of branching IS expensive + +For completeness, here are the cases where shader branching genuinely hurts — none of which apply to +our design: + +1. **Per-fragment data-dependent branches with high divergence.** Example: `if (texture(noise, uv).r + + > 0.5)` where the noise texture produces a random pattern. Every warp has ~50% divergence. Every + > warp pays for both paths. This is the scenario the "branches are bad" folklore warns about. We + > have no per-fragment data-dependent branches in the main pipeline. + +2. **Branches where both paths are very long.** If both sides of a branch are 500+ instructions, + divergent warps pay double a large cost. Our SDF functions are 10–25 instructions each. Even + fully divergent, the penalty is ~25 extra instructions — less than a single texture sample's + latency. + +3. **Branches that prevent compiler optimizations.** Some compilers cannot schedule instructions + across branch boundaries, reducing VLIW utilization on older architectures. Modern GPUs (NVIDIA + Volta+, AMD RDNA+, Apple M-series) use scalar+vector execution models where this is not a + concern. + +4. **Register pressure from the union of all branches.** This is the real cost, and it is why we + split heavy effects (shadows, glass) into separate pipelines. Within the main pipeline, all SDF + branches have similar register footprints (12–22 registers), so combining them causes negligible + occupancy loss. + +**References:** + +- ARM solidpixel blog on branches in mobile shaders — comprehensive taxonomy of branch execution + models across GPU generations, confirms uniform and warp-coherent branches are free on modern + hardware: + https://solidpixel.github.io/2021/12/09/branches_in_shaders.html +- Peter Stefek's "A Note on Branching Within a Shader" — practical measurements showing that + warp-coherent branches have zero overhead on Pascal/Volta/Ampere, with clear explanation of the + SIMT divergence mechanism: + https://www.peterstefek.me/shader-branch.html +- NVIDIA Volta architecture whitepaper — documents independent thread scheduling which allows + divergent threads to reconverge more efficiently than older architectures: + https://images.nvidia.com/content/volta-architecture/pdf/volta-architecture-whitepaper.pdf +- Randy Gaul on warp divergence in practice with per-primitive shape_type branching: + https://randygaul.github.io/graphics/2025/03/04/2D-Rendering-SDF-and-Atlases.html + +### Main pipeline: SDF + tessellated (unified) + +The main pipeline serves two submission modes through a single `TRIANGLELIST` pipeline and a single +vertex input layout, distinguished by a push constant: + +- **Tessellated mode** (`mode = 0`): direct vertex buffer with explicit geometry. Unchanged from + today. Used for text (SDL_ttf atlas sampling), polylines, triangle fans/strips, gradient-filled + shapes, and any user-provided raw vertex geometry. +- **SDF mode** (`mode = 1`): shared unit-quad vertex buffer + GPU storage buffer of `Primitive` + structs, drawn instanced. Used for all shapes with closed-form signed distance functions. + +Both modes converge on the same fragment shader, which dispatches on a `shape_kind` discriminant +carried either in the vertex data (tessellated, always `Solid = 0`) or in the storage-buffer +primitive struct (SDF modes). + +#### Why SDF for shapes + +CPU-side adaptive tessellation for curved shapes (the current approach) has three problems: + +1. **Vertex bandwidth.** A rounded rectangle with four corner arcs produces ~250 vertices × 20 bytes + = 5 KB. An SDF rounded rectangle is one `Primitive` struct (~56 bytes) plus 4 shared unit-quad + vertices. That is roughly a 90× reduction per shape. + +2. **Quality.** Tessellated curves are piecewise-linear approximations. At high DPI or under + animation/zoom, faceting is visible at any practical segment count. SDF evaluation produces + mathematically exact boundaries with perfect anti-aliasing via `smoothstep` in the fragment + shader. + +3. **Feature cost.** Adding soft edges, outlines, stroke effects, or rounded-cap line segments + requires extensive per-shape tessellation code. With SDF, these are trivial fragment shader + operations: `abs(d) - thickness` for stroke, `smoothstep(-soft, soft, d)` for soft edges. + +**References:** + +- Inigo Quilez's 2D SDF primitive catalog (primary source for all SDF functions used): + https://iquilezles.org/articles/distfunctions2d/ +- Valve's 2007 SIGGRAPH paper on SDF for vector textures and glyphs (foundational reference): + https://steamcdn-a.akamaihd.net/apps/valve/2007/SIGGRAPH2007_AlphaTestedMagnification.pdf +- Randy Gaul's practical writeup on SDF 2D rendering with shape-type branching, attribute layout, + warp divergence tradeoffs, and polyline rendering: + https://randygaul.github.io/graphics/2025/03/04/2D-Rendering-SDF-and-Atlases.html +- Audulus vger-rs — production 2D renderer using a single unified pipeline with SDF type + discriminant, same architecture as this plan. Replaced nanovg, achieving 120 FPS where nanovg fell + to 30 FPS due to CPU-side tessellation: + https://github.com/audulus/vger-rs + +#### Storage-buffer instancing for SDF primitives + +SDF primitives are submitted via a GPU storage buffer indexed by `gl_InstanceIndex` in the vertex +shader, rather than encoding per-primitive data redundantly in vertex attributes. This follows the +pattern used by both Zed GPUI and vger-rs. + +Each SDF shape is described by a single `Primitive` struct (~56 bytes) in the storage buffer. The +vertex shader reads `primitives[gl_InstanceIndex]`, computes the quad corner position from the unit +vertex and the primitive's bounds, and passes shape parameters to the fragment shader via `flat` +interpolated varyings. + +Compared to encoding per-primitive data in vertex attributes (the "fat vertex" approach), storage- +buffer instancing eliminates the 4–6× data duplication across quad corners. A rounded rectangle costs +56 bytes instead of 4 vertices × 40+ bytes = 160+ bytes. + +The tessellated path retains the existing direct vertex buffer layout (20 bytes/vertex, no storage +buffer access). The vertex shader branch on `mode` (push constant) is warp-uniform — every invocation +in a draw call has the same mode — so it is effectively free on all modern GPUs. + +#### Shape kinds + +Primitives in the main pipeline's storage buffer carry a `Shape_Kind` discriminant: + +| Kind | SDF function | Notes | +| ---------- | -------------------------------------- | --------------------------------------------------------- | +| `RRect` | `sdRoundedBox` (iq) | Per-corner radii. Covers all Clay rectangles and borders. | +| `Circle` | `sdCircle` | Filled and stroked. | +| `Ellipse` | `sdEllipse` | Exact (iq's closed-form). | +| `Segment` | `sdSegment` capsule | Rounded caps, correct sub-pixel thin lines. | +| `Ring_Arc` | `abs(sdCircle) - thickness` + arc mask | Rings, arcs, circle sectors unified. | +| `NGon` | `sdRegularPolygon` | Regular n-gon for n ≥ 5. | + +The `Solid` kind (value 0) is reserved for the tessellated path, where `shape_kind` is implicitly +zero because the fragment shader receives it from zero-initialized vertex attributes. + +Stroke/outline variants of each shape are handled by the `Shape_Flags` bit set rather than separate +shape kinds. The fragment shader transforms `d = abs(d) - stroke_width` when the `Stroke` flag is +set. + +**What stays tessellated:** + +- Text (SDL_ttf atlas, pending future MSDF evaluation) +- `rectangle_gradient`, `circle_gradient` (per-vertex color interpolation) +- `triangle_fan`, `triangle_strip` (arbitrary user-provided point lists) +- `line_strip` / polylines (SDF polyline rendering is possible but complex; deferred) +- Any raw vertex geometry submitted via `prepare_shape` + +The rule: if the shape has a closed-form SDF, it goes SDF. If it's described only by a vertex list or +needs per-vertex color interpolation, it stays tessellated. + +### Effects pipeline + +The effects pipeline handles blur-based visual effects: drop shadows, inner shadows, outer glow, and +similar. It uses the same storage-buffer instancing pattern as the main pipeline's SDF path, with a +dedicated pipeline state object that has its own compiled fragment shader. + +#### Combined shape + effect rendering + +When a shape has an effect (e.g., a rounded rectangle with a drop shadow), the shape is drawn +**once**, entirely in the effects pipeline. The effects fragment shader evaluates both the effect +(blur math) and the base shape's SDF, compositing them in a single pass. The shape is not duplicated +across pipelines. + +This avoids redundant overdraw. Consider a 200×100 rounded rect with a drop shadow offset by (5, 5) +and blur sigma 10: + +- **Separate-primitive approach** (shape in main pipeline + shadow in effects pipeline): the shadow + quad covers ~230×130 = 29,900 pixels, the shape quad covers 200×100 = 20,000 pixels. The ~18,500 + shadow fragments underneath the shape run the expensive blur shader only to be overwritten by the + shape. Total fragment invocations: ~49,900. + +- **Combined approach** (one primitive in effects pipeline): one quad covers ~230×130 = 29,900 + pixels. The fragment shader evaluates the blur, then evaluates the shape SDF, composites the shape + on top. Total fragment invocations: ~29,900. The 20,000 shape-region fragments run the blur+shape + shader, but the shape SDF evaluation adds only ~15 ops to an ~80 op blur shader. + +The combined approach uses **~40% fewer fragment invocations** per effected shape (29,900 vs 49,900) +in the common opaque case. The shape-region fragments pay a small additional cost for shape SDF +evaluation in the effects shader (~15 ops), but this is far cheaper than running 18,500 fragments +through the full blur shader (~80 ops each) and then discarding their output. For a UI with 10 +shadowed elements, the combined approach saves roughly 200,000 fragment invocations per frame. + +An `Effect_Flag.Draw_Base_Shape` flag controls whether the sharp shape layer composites on top +(default true for drop shadow, always true for inner shadow). Standalone effects (e.g., a glow with +no shape on top) clear this flag. + +Shapes without effects are submitted to the main pipeline as normal. Only shapes that have effects +are routed to the effects pipeline. + +#### Drop shadow implementation + +Drop shadows use the analytical blurred-rounded-rectangle technique. Raph Levien's 2020 blog post +describes an erf-based approximation that computes a Gaussian-blurred rounded rectangle in closed +form along one axis and with a 4-sample numerical integration along the other. Total fragment cost is +~80 FLOPs, one sqrt, no texture samples. This is the same technique used by Zed GPUI (via Evan +Wallace's variant) and vger-rs. + +**References:** + +- Raph Levien's blurred rounded rectangles post (erf approximation, squircle contour refinement): + https://raphlinus.github.io/graphics/2020/04/21/blurred-rounded-rects.html +- Evan Wallace's original WebGL implementation (used by Figma): + https://madebyevan.com/shaders/fast-rounded-rectangle-shadows/ +- Vello's implementation of blurred rounded rectangle as a gradient type: + https://github.com/linebender/vello/pull/665 + +### Backdrop-effects pipeline + +The backdrop-effects pipeline handles effects that sample the current render target as input: frosted +glass, refraction, mirror surfaces. It is structurally separated from the effects pipeline for two +reasons: + +1. **Render-state requirement.** Before any backdrop-sampling fragment can run, the current render + target must be copied to a separate texture via `CopyGPUTextureToTexture`. This is a command- + buffer-level operation that cannot happen mid-render-pass. The copy naturally creates a pipeline + boundary. + +2. **Register pressure.** Backdrop-sampling shaders read from a texture with Gaussian kernel weights + (multiple texture fetches per fragment), pushing register usage to ~70–80. Including this in the + effects pipeline would reduce occupancy for all shadow/glow fragments from ~30% to ~20%, costing + measurable throughput on the common case. + +The backdrop-effects pipeline binds a secondary sampler pointing at the captured backdrop texture. When +no backdrop effects are present in a frame, this pipeline is never bound and the texture copy never +happens — zero cost. + +### Vertex layout + +The vertex struct is unchanged from the current 20-byte layout: + +``` +Vertex :: struct { + position: [2]f32, // 0: screen-space position + uv: [2]f32, // 8: atlas UV (text) or unused (shapes) + color: Color, // 16: u8x4, GPU-normalized to float +} +``` + +This layout is shared between the tessellated path and the SDF unit-quad vertices. For tessellated +draws, `position` carries actual world-space geometry. For SDF draws, `position` carries unit-quad +corners (0,0 to 1,1) and the vertex shader computes world-space position from the storage-buffer +primitive's bounds. + +The `Primitive` struct for SDF shapes lives in the storage buffer, not in vertex attributes: + +``` +Primitive :: struct { + kind: Shape_Kind, // 0: enum u8 + flags: Shape_Flags, // 1: bit_set[Shape_Flag; u8] + _pad: u16, // 2: reserved + bounds: [4]f32, // 4: min_x, min_y, max_x, max_y + color: Color, // 20: u8x4 + _pad2: [3]u8, // 24: alignment + params: Shape_Params, // 28: raw union, 32 bytes +} +// Total: 60 bytes (padded to 64 for GPU alignment) +``` + +`Shape_Params` is a `#raw_union` with named variants per shape kind (`rrect`, `circle`, `segment`, +etc.), ensuring type safety on the CPU side and zero-cost reinterpretation on the GPU side. + +### Draw submission order + +Within each scissor region, draws are issued in submission order to preserve the painter's algorithm: + +1. Bind **effects pipeline** → draw all queued effects primitives for this scissor (instanced, one + draw call). Each effects primitive includes its base shape and composites internally. +2. Bind **main pipeline, tessellated mode** → draw all queued tessellated vertices (non-indexed for + shapes, indexed for text). Pipeline state unchanged from today. +3. Bind **main pipeline, SDF mode** → draw all queued SDF primitives (instanced, one draw call). +4. If backdrop effects are present: copy render target, bind **backdrop-effects pipeline** → draw + backdrop primitives. + +The exact ordering within a scissor may be refined based on actual Z-ordering requirements. The key +invariant is that each primitive is drawn exactly once, in the pipeline that owns it. + +### Text rendering + +Text rendering currently uses SDL_ttf's GPU text engine, which rasterizes glyphs per `(font, size)` +pair into bitmap atlases and emits indexed triangle data via `GetGPUTextDrawData`. This path is +**unchanged** by the SDF migration — text continues to flow through the main pipeline's tessellated +mode with `shape_kind = Solid`, sampling the SDL_ttf atlas texture. + +A future phase may evaluate MSDF (multi-channel signed distance field) text rendering, which would +allow resolution-independent glyph rendering from a single small atlas per font. This would involve: + +- Offline atlas generation via Chlumský's msdf-atlas-gen tool. +- Runtime glyph metrics via `vendor:stb/truetype` (already in the Odin distribution). +- A new `Shape_Kind.MSDF_Glyph` variant in the main pipeline's fragment shader. +- Potential removal of the SDL_ttf dependency. + +This is explicitly deferred. The SDF shape migration is independent of and does not block text +changes. + +**References:** + +- Viktor Chlumský's MSDF master's thesis and msdfgen tool: + https://github.com/Chlumsky/msdfgen +- MSDF atlas generator for font atlas packing: + https://github.com/Chlumsky/msdf-atlas-gen +- Valve's original SDF text rendering paper (SIGGRAPH 2007): + https://steamcdn-a.akamaihd.net/apps/valve/2007/SIGGRAPH2007_AlphaTestedMagnification.pdf + +## 3D rendering + +3D pipeline architecture is under consideration and will be documented separately. The current +expectation is that 3D rendering will use dedicated pipelines (separate from the 2D pipelines) +sharing GPU resources (textures, samplers, command buffer lifecycle) with the 2D renderer. + +## Building shaders + +GLSL shader sources live in `shaders/source/`. Compiled outputs (SPIR-V and Metal Shading Language) +are generated into `shaders/generated/` via the meta tool: + +``` +odin run meta -- gen-shaders +``` + +Requires `glslangValidator` and `spirv-cross` on PATH. diff --git a/draw/draw.odin b/draw/draw.odin new file mode 100644 index 0000000..c368cde --- /dev/null +++ b/draw/draw.odin @@ -0,0 +1,759 @@ +package draw + +import clay "../vendor/clay" +import "base:runtime" +import "core:c" +import "core:log" + +import "core:strings" +import sdl "vendor:sdl3" +import sdl_ttf "vendor:sdl3/ttf" + +when ODIN_OS == .Darwin { + SHADER_TYPE :: sdl.GPUShaderFormat{.MSL} + ENTRY_POINT :: "main0" +} else { + SHADER_TYPE :: sdl.GPUShaderFormat{.SPIRV} + ENTRY_POINT :: "main" +} + +BUFFER_INIT_SIZE :: 256 +INITIAL_LAYER_SIZE :: 5 +INITIAL_SCISSOR_SIZE :: 10 + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Color ------------------------- +// --------------------------------------------------------------------------------------------------------------------- + +Color :: distinct [4]u8 + +BLACK :: Color{0, 0, 0, 255} +WHITE :: Color{255, 255, 255, 255} +RED :: Color{255, 0, 0, 255} +GREEN :: Color{0, 255, 0, 255} +BLUE :: Color{0, 0, 255, 255} +BLANK :: Color{0, 0, 0, 0} + +// Convert clay.Color ([4]c.float in 0–255 range) to Color. +color_from_clay :: proc(cc: clay.Color) -> Color { + return Color{u8(cc[0]), u8(cc[1]), u8(cc[2]), u8(cc[3])} +} + +// Convert Color to [4]f32 in 0.0–1.0 range. Useful for SDL interop (e.g. clear color). +color_to_f32 :: proc(c: Color) -> [4]f32 { + INV :: 1.0 / 255.0 + return {f32(c[0]) * INV, f32(c[1]) * INV, f32(c[2]) * INV, f32(c[3]) * INV} +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Core types -------------------- +// --------------------------------------------------------------------------------------------------------------------- + +Rectangle :: struct { + x: f32, + y: f32, + w: f32, + h: f32, +} + +Sub_Batch_Kind :: enum u8 { + Shapes, // non-indexed, white texture, mode 0 + Text, // indexed, atlas texture, mode 0 + SDF, // instanced unit quad, white texture, mode 1 +} + +Sub_Batch :: struct { + kind: Sub_Batch_Kind, + offset: u32, // Shapes: vertex offset; Text: text_batch index; SDF: primitive index + count: u32, // Shapes: vertex count; Text: always 1; SDF: primitive count +} + +Layer :: struct { + bounds: Rectangle, + sub_batch_start: u32, + sub_batch_len: u32, + scissor_start: u32, + scissor_len: u32, +} + +Scissor :: struct { + bounds: sdl.Rect, + sub_batch_start: u32, + sub_batch_len: u32, +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Global state ------------------ +// --------------------------------------------------------------------------------------------------------------------- + +GLOB: Global + +Global :: struct { + odin_context: runtime.Context, + pipeline_2d_base: Pipeline_2D_Base, + text_cache: Text_Cache, + layers: [dynamic]Layer, + scissors: [dynamic]Scissor, + tmp_shape_verts: [dynamic]Vertex, + tmp_text_verts: [dynamic]Vertex, + tmp_text_indices: [dynamic]c.int, + tmp_text_batches: [dynamic]TextBatch, + tmp_primitives: [dynamic]Primitive, + tmp_sub_batches: [dynamic]Sub_Batch, + clay_mem: [^]u8, + msaa_texture: ^sdl.GPUTexture, + curr_layer_index: uint, + max_layers: int, + max_scissors: int, + max_shape_verts: int, + max_text_verts: int, + max_text_indices: int, + max_text_batches: int, + max_primitives: int, + max_sub_batches: int, + dpi_scaling: f32, + msaa_w: u32, + msaa_h: u32, + sample_count: sdl.GPUSampleCount, + clay_z_index: i16, + cleared: bool, +} + +Init_Options :: struct { + // MSAA sample count. Default is ._1 (no MSAA). SDF rendering does not benefit from MSAA + // because SDF fragments compute coverage analytically via `smoothstep`. MSAA helps for + // text glyph edges and tessellated user geometry. Set to ._4 or ._8 for text-heavy UIs, + // or use `MSAA_MAX` to request the highest sample count the GPU supports for the swapchain + // format. + msaa_samples: sdl.GPUSampleCount, +} + +// Sentinel value: when passed as msaa_samples, `init` will use the maximum MSAA sample count +// supported by the GPU for the swapchain format. +MSAA_MAX :: sdl.GPUSampleCount(0xFF) + +// Initialize the renderer. Returns false if GPU pipeline or text engine creation fails. +@(require_results) +init :: proc( + device: ^sdl.GPUDevice, + window: ^sdl.Window, + options: Init_Options = {}, + allocator := context.allocator, + odin_context := context, +) -> ( + ok: bool, +) { + min_memory_size: c.size_t = cast(c.size_t)clay.MinMemorySize() + resolved_sample_count := options.msaa_samples + if resolved_sample_count == MSAA_MAX { + resolved_sample_count = max_sample_count(device, window) + } + + pipeline, pipeline_ok := create_pipeline_2d_base(device, window, resolved_sample_count) + if !pipeline_ok { + return false + } + + text_cache, text_ok := init_text_cache(device, allocator) + if !text_ok { + destroy_pipeline_2d_base(device, &pipeline) + return false + } + + GLOB = Global { + layers = make([dynamic]Layer, 0, INITIAL_LAYER_SIZE, allocator = allocator), + scissors = make([dynamic]Scissor, 0, INITIAL_SCISSOR_SIZE, allocator = allocator), + tmp_shape_verts = make([dynamic]Vertex, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_text_verts = make([dynamic]Vertex, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_text_indices = make([dynamic]c.int, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_text_batches = make([dynamic]TextBatch, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_primitives = make([dynamic]Primitive, 0, BUFFER_INIT_SIZE, allocator = allocator), + tmp_sub_batches = make([dynamic]Sub_Batch, 0, BUFFER_INIT_SIZE, allocator = allocator), + odin_context = odin_context, + dpi_scaling = sdl.GetWindowDisplayScale(window), + clay_mem = make([^]u8, min_memory_size, allocator = allocator), + sample_count = resolved_sample_count, + pipeline_2d_base = pipeline, + text_cache = text_cache, + } + log.debug("Window DPI scaling:", GLOB.dpi_scaling) + arena := clay.CreateArenaWithCapacityAndMemory(min_memory_size, GLOB.clay_mem) + window_width, window_height: c.int + sdl.GetWindowSize(window, &window_width, &window_height) + + clay.Initialize(arena, {f32(window_width), f32(window_height)}, {handler = clay_error_handler}) + clay.SetMeasureTextFunction(measure_text, nil) + return true +} + +// TODO every x frames nuke max values in case of edge cases where max gets set very high +// Called at the end of every frame +resize_global :: proc() { + if len(GLOB.layers) > GLOB.max_layers do GLOB.max_layers = len(GLOB.layers) + shrink(&GLOB.layers, GLOB.max_layers) + if len(GLOB.scissors) > GLOB.max_scissors do GLOB.max_scissors = len(GLOB.scissors) + shrink(&GLOB.scissors, GLOB.max_scissors) + if len(GLOB.tmp_shape_verts) > GLOB.max_shape_verts do GLOB.max_shape_verts = len(GLOB.tmp_shape_verts) + shrink(&GLOB.tmp_shape_verts, GLOB.max_shape_verts) + if len(GLOB.tmp_text_verts) > GLOB.max_text_verts do GLOB.max_text_verts = len(GLOB.tmp_text_verts) + shrink(&GLOB.tmp_text_verts, GLOB.max_text_verts) + if len(GLOB.tmp_text_indices) > GLOB.max_text_indices do GLOB.max_text_indices = len(GLOB.tmp_text_indices) + shrink(&GLOB.tmp_text_indices, GLOB.max_text_indices) + if len(GLOB.tmp_text_batches) > GLOB.max_text_batches do GLOB.max_text_batches = len(GLOB.tmp_text_batches) + shrink(&GLOB.tmp_text_batches, GLOB.max_text_batches) + if len(GLOB.tmp_primitives) > GLOB.max_primitives do GLOB.max_primitives = len(GLOB.tmp_primitives) + shrink(&GLOB.tmp_primitives, GLOB.max_primitives) + if len(GLOB.tmp_sub_batches) > GLOB.max_sub_batches do GLOB.max_sub_batches = len(GLOB.tmp_sub_batches) + shrink(&GLOB.tmp_sub_batches, GLOB.max_sub_batches) +} + +destroy :: proc(device: ^sdl.GPUDevice, allocator := context.allocator) { + delete(GLOB.layers) + delete(GLOB.scissors) + delete(GLOB.tmp_shape_verts) + delete(GLOB.tmp_text_verts) + delete(GLOB.tmp_text_indices) + delete(GLOB.tmp_text_batches) + delete(GLOB.tmp_primitives) + delete(GLOB.tmp_sub_batches) + free(GLOB.clay_mem, allocator) + if GLOB.msaa_texture != nil { + sdl.ReleaseGPUTexture(device, GLOB.msaa_texture) + } + destroy_pipeline_2d_base(device, &GLOB.pipeline_2d_base) + destroy_text_cache() +} + +// Internal +clear_global :: proc() { + GLOB.curr_layer_index = 0 + GLOB.clay_z_index = 0 + GLOB.cleared = false + clear(&GLOB.layers) + clear(&GLOB.scissors) + clear(&GLOB.tmp_shape_verts) + clear(&GLOB.tmp_text_verts) + clear(&GLOB.tmp_text_indices) + clear(&GLOB.tmp_text_batches) + clear(&GLOB.tmp_primitives) + clear(&GLOB.tmp_sub_batches) +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Text measurement (Clay) ------- +// --------------------------------------------------------------------------------------------------------------------- + +@(private = "file") +measure_text :: proc "c" ( + text: clay.StringSlice, + config: ^clay.TextElementConfig, + user_data: rawptr, +) -> clay.Dimensions { + context = GLOB.odin_context + text := string(text.chars[:text.length]) + c_text := strings.clone_to_cstring(text, context.temp_allocator) + w, h: c.int + if !sdl_ttf.GetStringSize(get_font(config.fontId, config.fontSize), c_text, 0, &w, &h) { + log.panicf("Failed to measure text: %s", sdl.GetError()) + } + + return clay.Dimensions{width = f32(w) / GLOB.dpi_scaling, height = f32(h) / GLOB.dpi_scaling} +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Frame lifecycle --------------- +// --------------------------------------------------------------------------------------------------------------------- + +// Sets up renderer to begin upload to the GPU. Returns starting `Layer` to begin processing primitives for. +begin :: proc(bounds: Rectangle) -> ^Layer { + // Cleanup + clear_global() + + // Begin new layer + start a new scissor + scissor := Scissor { + bounds = sdl.Rect { + x = i32(bounds.x * GLOB.dpi_scaling), + y = i32(bounds.y * GLOB.dpi_scaling), + w = i32(bounds.w * GLOB.dpi_scaling), + h = i32(bounds.h * GLOB.dpi_scaling), + }, + } + append(&GLOB.scissors, scissor) + + layer := Layer { + bounds = bounds, + scissor_len = 1, + } + append(&GLOB.layers, layer) + return &GLOB.layers[GLOB.curr_layer_index] +} + +// Creates a new layer +new_layer :: proc(prev_layer: ^Layer, bounds: Rectangle) -> ^Layer { + layer := Layer { + bounds = bounds, + sub_batch_start = prev_layer.sub_batch_start + prev_layer.sub_batch_len, + scissor_start = prev_layer.scissor_start + prev_layer.scissor_len, + scissor_len = 1, + } + append(&GLOB.layers, layer) + GLOB.curr_layer_index += 1 + log.debug("Added new layer; curr index", GLOB.curr_layer_index) + + scissor := Scissor { + sub_batch_start = u32(len(GLOB.tmp_sub_batches)), + bounds = sdl.Rect { + x = i32(bounds.x * GLOB.dpi_scaling), + y = i32(bounds.y * GLOB.dpi_scaling), + w = i32(bounds.w * GLOB.dpi_scaling), + h = i32(bounds.h * GLOB.dpi_scaling), + }, + } + append(&GLOB.scissors, scissor) + return &GLOB.layers[GLOB.curr_layer_index] +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Built-in primitive processing -- +// --------------------------------------------------------------------------------------------------------------------- + +// Submit shape vertices (colored triangles) to the given layer for rendering. +prepare_shape :: proc(layer: ^Layer, vertices: []Vertex) { + if len(vertices) == 0 do return + offset := u32(len(GLOB.tmp_shape_verts)) + append(&GLOB.tmp_shape_verts, ..vertices) + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + append_or_extend_sub_batch(scissor, layer, .Shapes, offset, u32(len(vertices))) +} + +// Submit an SDF primitive to the given layer for rendering. +prepare_sdf_primitive :: proc(layer: ^Layer, prim: Primitive) { + offset := u32(len(GLOB.tmp_primitives)) + append(&GLOB.tmp_primitives, prim) + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + append_or_extend_sub_batch(scissor, layer, .SDF, offset, 1) +} + +// Submit a text element to the given layer for rendering. +// Copies SDL_ttf vertices directly (with baked position) and copies indices for indexed drawing. +prepare_text :: proc(layer: ^Layer, txt: Text) { + data := sdl_ttf.GetGPUTextDrawData(txt.ref) + if data == nil { + return // nil is normal for empty text + } + + scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + + for data != nil { + vertex_start := u32(len(GLOB.tmp_text_verts)) + index_start := u32(len(GLOB.tmp_text_indices)) + + // Copy vertices with baked position offset + for i in 0 ..< data.num_vertices { + pos := data.xy[i] + uv := data.uv[i] + append( + &GLOB.tmp_text_verts, + Vertex { + position = {pos.x + txt.position[0] * GLOB.dpi_scaling, -pos.y + txt.position[1] * GLOB.dpi_scaling}, + uv = {uv.x, uv.y}, + color = txt.color, + }, + ) + } + + // Copy indices directly + append(&GLOB.tmp_text_indices, ..data.indices[:data.num_indices]) + + batch_idx := u32(len(GLOB.tmp_text_batches)) + append( + &GLOB.tmp_text_batches, + TextBatch { + atlas_texture = data.atlas_texture, + vertex_start = vertex_start, + vertex_count = u32(data.num_vertices), + index_start = index_start, + index_count = u32(data.num_indices), + }, + ) + + // Each atlas chunk is a separate sub-batch (different atlas textures can't coalesce) + append_or_extend_sub_batch(scissor, layer, .Text, batch_idx, 1) + + data = data.next + } +} + +// Append a new sub-batch or extend the last one if same kind and contiguous. +@(private) +append_or_extend_sub_batch :: proc( + scissor: ^Scissor, + layer: ^Layer, + kind: Sub_Batch_Kind, + offset: u32, + count: u32, +) { + if scissor.sub_batch_len > 0 { + last := &GLOB.tmp_sub_batches[scissor.sub_batch_start + scissor.sub_batch_len - 1] + if last.kind == kind && kind != .Text && last.offset + last.count == offset { + last.count += count + return + } + } + append(&GLOB.tmp_sub_batches, Sub_Batch{kind = kind, offset = offset, count = count}) + scissor.sub_batch_len += 1 + layer.sub_batch_len += 1 +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Clay ------------------------ +// --------------------------------------------------------------------------------------------------------------------- + +@(private = "file") +clay_error_handler :: proc "c" (errorData: clay.ErrorData) { + context = GLOB.odin_context + log.error("Clay error:", errorData.errorType, errorData.errorText) +} + +ClayBatch :: struct { + bounds: Rectangle, + cmds: clay.ClayArray(clay.RenderCommand), +} + +// Process Clay render commands into shape and text primitives. +prepare_clay_batch :: proc( + base_layer: ^Layer, + batch: ^ClayBatch, + mouse_wheel_delta: [2]f32, + frame_time: f32 = 0, +) { + mouse_pos: [2]f32 + mouse_flags := sdl.GetMouseState(&mouse_pos.x, &mouse_pos.y) + + // Update clay internals + clay.SetPointerState( + clay.Vector2{mouse_pos.x - base_layer.bounds.x, mouse_pos.y - base_layer.bounds.y}, + .LEFT in mouse_flags, + ) + clay.UpdateScrollContainers(true, mouse_wheel_delta, frame_time) + + layer := base_layer + + // Parse render commands + for i in 0 ..< int(batch.cmds.length) { + render_command := clay.RenderCommandArray_Get(&batch.cmds, cast(i32)i) + + // Translate bounding box of the primitive by the layer position + bounds := Rectangle { + x = render_command.boundingBox.x + layer.bounds.x, + y = render_command.boundingBox.y + layer.bounds.y, + w = render_command.boundingBox.width, + h = render_command.boundingBox.height, + } + + if render_command.zIndex > GLOB.clay_z_index { + log.debug("Higher zIndex found, creating new layer & setting z_index to", render_command.zIndex) + layer = new_layer(layer, bounds) + // Update bounds to new layer offset + bounds.x = render_command.boundingBox.x + layer.bounds.x + bounds.y = render_command.boundingBox.y + layer.bounds.y + GLOB.clay_z_index = render_command.zIndex + } + + switch (render_command.commandType) { + case clay.RenderCommandType.None: + case clay.RenderCommandType.Text: + render_data := render_command.renderData.text + txt := string(render_data.stringContents.chars[:render_data.stringContents.length]) + c_text := strings.clone_to_cstring(txt, context.temp_allocator) + sdl_text := GLOB.text_cache.cache[render_command.id] + + if sdl_text == nil { + // Cache a SDL text object + sdl_text = sdl_ttf.CreateText( + GLOB.text_cache.engine, + get_font(render_data.fontId, render_data.fontSize), + c_text, + 0, + ) + if sdl_text == nil { + log.panicf("Failed to create SDL text for clay render command: %s", sdl.GetError()) + } + GLOB.text_cache.cache[render_command.id] = sdl_text + } else { + if !sdl_ttf.SetTextString(sdl_text, c_text, 0) { + log.panicf("Failed to update SDL text string: %s", sdl.GetError()) + } + } + + prepare_text(layer, Text{sdl_text, {bounds.x, bounds.y}, color_from_clay(render_data.textColor)}) + case clay.RenderCommandType.Image: + case clay.RenderCommandType.ScissorStart: + if bounds.w == 0 || bounds.h == 0 { + continue + } + + curr_scissor := &GLOB.scissors[layer.scissor_start + layer.scissor_len - 1] + + if curr_scissor.sub_batch_len != 0 { + // Scissor has some content, need to make a new scissor + new := Scissor { + sub_batch_start = curr_scissor.sub_batch_start + curr_scissor.sub_batch_len, + bounds = sdl.Rect { + c.int(bounds.x * GLOB.dpi_scaling), + c.int(bounds.y * GLOB.dpi_scaling), + c.int(bounds.w * GLOB.dpi_scaling), + c.int(bounds.h * GLOB.dpi_scaling), + }, + } + append(&GLOB.scissors, new) + layer.scissor_len += 1 + } else { + curr_scissor.bounds = sdl.Rect { + c.int(bounds.x * GLOB.dpi_scaling), + c.int(bounds.y * GLOB.dpi_scaling), + c.int(bounds.w * GLOB.dpi_scaling), + c.int(bounds.h * GLOB.dpi_scaling), + } + } + case clay.RenderCommandType.ScissorEnd: + case clay.RenderCommandType.Rectangle: + render_data := render_command.renderData.rectangle + cr := render_data.cornerRadius + color := color_from_clay(render_data.backgroundColor) + radii := [4]f32{cr.topLeft, cr.topRight, cr.bottomRight, cr.bottomLeft} + + if radii == {0, 0, 0, 0} { + rectangle(layer, bounds, color) + } else { + rectangle_corners(layer, bounds, radii, color) + } + case clay.RenderCommandType.Border: + render_data := render_command.renderData.border + cr := render_data.cornerRadius + color := color_from_clay(render_data.color) + thick := f32(render_data.width.top) + radii := [4]f32{cr.topLeft, cr.topRight, cr.bottomRight, cr.bottomLeft} + + if radii == {0, 0, 0, 0} { + rectangle_lines(layer, bounds, color, thick) + } else { + rectangle_corners_lines(layer, bounds, radii, color, thick) + } + case clay.RenderCommandType.Custom: + } + } +} + +// Render primitives. clear_color is the background fill before any layers are drawn. +end :: proc( + device: ^sdl.GPUDevice, + window: ^sdl.Window, + clear_color: Color = BLACK, +) { + cmd_buffer := sdl.AcquireGPUCommandBuffer(device) + if cmd_buffer == nil { + log.panicf("Failed to acquire GPU command buffer: %s", sdl.GetError()) + } + + // Upload primitives to GPU + copy_pass := sdl.BeginGPUCopyPass(cmd_buffer) + upload(device, copy_pass) + sdl.EndGPUCopyPass(copy_pass) + + // Resize dynamic arrays + // TODO: This should only be called occasionally, not every frame. + resize_global() + + swapchain_texture: ^sdl.GPUTexture + w, h: u32 + if !sdl.WaitAndAcquireGPUSwapchainTexture(cmd_buffer, window, &swapchain_texture, &w, &h) { + log.panicf("Failed to acquire swapchain texture: %s", sdl.GetError()) + } + + if swapchain_texture == nil { + // Window is minimized or not visible — submit and skip this frame + if !sdl.SubmitGPUCommandBuffer(cmd_buffer) { + log.panicf("Failed to submit GPU command buffer (minimized window): %s", sdl.GetError()) + } + return + } + + use_msaa := GLOB.sample_count != ._1 + render_texture := swapchain_texture + + if use_msaa { + ensure_msaa_texture(device, sdl.GetGPUSwapchainTextureFormat(device, window), w, h) + render_texture = GLOB.msaa_texture + } + + cc := color_to_f32(clear_color) + + // Draw layers. One render pass per layer; sub-batches draw in submission order within each scissor. + for &layer, index in GLOB.layers { + log.debug("Drawing layer", index) + draw_layer(device, window, cmd_buffer, render_texture, w, h, cc, &layer) + } + + // Resolve MSAA render texture to the swapchain. + if use_msaa { + resolve_pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = render_texture, + load_op = .LOAD, + store_op = .RESOLVE, + resolve_texture = swapchain_texture, + }, + 1, + nil, + ) + sdl.EndGPURenderPass(resolve_pass) + } + + if !sdl.SubmitGPUCommandBuffer(cmd_buffer) { + log.panicf("Failed to submit GPU command buffer: %s", sdl.GetError()) + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- MSAA -------------------------- +// --------------------------------------------------------------------------------------------------------------------- + +// Query the highest MSAA sample count supported by the GPU for the swapchain format. +max_sample_count :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window) -> sdl.GPUSampleCount { + format := sdl.GetGPUSwapchainTextureFormat(device, window) + counts := [?]sdl.GPUSampleCount{._8, ._4, ._2} + for sc in counts { + if sdl.GPUTextureSupportsSampleCount(device, format, sc) do return sc + } + return ._1 +} + +@(private = "file") +ensure_msaa_texture :: proc(device: ^sdl.GPUDevice, format: sdl.GPUTextureFormat, w, h: u32) { + if GLOB.msaa_texture != nil && GLOB.msaa_w == w && GLOB.msaa_h == h { + return + } + if GLOB.msaa_texture != nil { + sdl.ReleaseGPUTexture(device, GLOB.msaa_texture) + } + GLOB.msaa_texture = sdl.CreateGPUTexture( + device, + sdl.GPUTextureCreateInfo { + type = .D2, + format = format, + usage = {.COLOR_TARGET}, + width = w, + height = h, + layer_count_or_depth = 1, + num_levels = 1, + sample_count = GLOB.sample_count, + }, + ) + if GLOB.msaa_texture == nil { + log.panicf("Failed to create MSAA texture (%dx%d): %s", w, h, sdl.GetError()) + } + GLOB.msaa_w = w + GLOB.msaa_h = h +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Utility ----------------------- +// --------------------------------------------------------------------------------------------------------------------- + +ortho_rh :: proc(left: f32, right: f32, bottom: f32, top: f32, near: f32, far: f32) -> matrix[4, 4]f32 { + return matrix[4, 4]f32{ + 2.0 / (right - left), 0.0, 0.0, -(right + left) / (right - left), + 0.0, 2.0 / (top - bottom), 0.0, -(top + bottom) / (top - bottom), + 0.0, 0.0, -2.0 / (far - near), -(far + near) / (far - near), + 0.0, 0.0, 0.0, 1.0, + } +} + +Draw_Mode :: enum u32 { + Tessellated = 0, + SDF = 1, +} + +Vertex_Uniforms :: struct { + projection: matrix[4, 4]f32, + scale: f32, + mode: Draw_Mode, +} + +// Push projection, dpi scale, and rendering mode as a single uniform block (slot 0). +push_globals :: proc(cmd_buffer: ^sdl.GPUCommandBuffer, w: f32, h: f32, mode: Draw_Mode = .Tessellated) { + globals := Vertex_Uniforms { + projection = ortho_rh(left = 0.0, top = 0.0, right = f32(w), bottom = f32(h), near = -1.0, far = 1.0), + scale = GLOB.dpi_scaling, + mode = mode, + } + + sdl.PushGPUVertexUniformData(cmd_buffer, 0, &globals, size_of(Vertex_Uniforms)) +} + +// --------------------------------------------------------------------------------------------------------------------- +// ----- Buffer ------------------------ +// --------------------------------------------------------------------------------------------------------------------- + +Buffer :: struct { + gpu: ^sdl.GPUBuffer, + transfer: ^sdl.GPUTransferBuffer, + size: u32, +} + +@(require_results) +create_buffer :: proc( + device: ^sdl.GPUDevice, + size: u32, + gpu_usage: sdl.GPUBufferUsageFlags, +) -> ( + buffer: Buffer, + ok: bool, +) { + gpu := sdl.CreateGPUBuffer(device, sdl.GPUBufferCreateInfo{usage = gpu_usage, size = size}) + if gpu == nil { + log.errorf("Failed to create GPU buffer (size=%d): %s", size, sdl.GetError()) + return buffer, false + } + transfer := sdl.CreateGPUTransferBuffer( + device, + sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = size}, + ) + if transfer == nil { + sdl.ReleaseGPUBuffer(device, gpu) + log.errorf("Failed to create GPU transfer buffer (size=%d): %s", size, sdl.GetError()) + return buffer, false + } + return Buffer{gpu, transfer, size}, true +} + +grow_buffer_if_needed :: proc( + device: ^sdl.GPUDevice, + buffer: ^Buffer, + new_size: u32, + gpu_usage: sdl.GPUBufferUsageFlags, +) { + if new_size > buffer.size { + log.debug("Resizing buffer from", buffer.size, "to", new_size) + destroy_buffer(device, buffer) + buffer.gpu = sdl.CreateGPUBuffer(device, sdl.GPUBufferCreateInfo{usage = gpu_usage, size = new_size}) + if buffer.gpu == nil { + log.panicf("Failed to grow GPU buffer (new_size=%d): %s", new_size, sdl.GetError()) + } + buffer.transfer = sdl.CreateGPUTransferBuffer( + device, + sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = new_size}, + ) + if buffer.transfer == nil { + log.panicf("Failed to grow GPU transfer buffer (new_size=%d): %s", new_size, sdl.GetError()) + } + buffer.size = new_size + } +} + +destroy_buffer :: proc(device: ^sdl.GPUDevice, buffer: ^Buffer) { + sdl.ReleaseGPUBuffer(device, buffer.gpu) + sdl.ReleaseGPUTransferBuffer(device, buffer.transfer) +} diff --git a/draw/examples/fonts/JetBrainsMono-Bold.ttf b/draw/examples/fonts/JetBrainsMono-Bold.ttf new file mode 100644 index 0000000..8c93043 Binary files /dev/null and b/draw/examples/fonts/JetBrainsMono-Bold.ttf differ diff --git a/draw/examples/fonts/JetBrainsMono-Regular.ttf b/draw/examples/fonts/JetBrainsMono-Regular.ttf new file mode 100644 index 0000000..dff66cc Binary files /dev/null and b/draw/examples/fonts/JetBrainsMono-Regular.ttf differ diff --git a/draw/examples/hellope.odin b/draw/examples/hellope.odin new file mode 100644 index 0000000..55dd446 --- /dev/null +++ b/draw/examples/hellope.odin @@ -0,0 +1,147 @@ +package examples + +import "../../draw" +import "../../vendor/clay" +import "core:c" +import "core:os" +import sdl "vendor:sdl3" +import sdl_ttf "vendor:sdl3/ttf" + +JETBRAINS_MONO_REGULAR_RAW :: #load("fonts/JetBrainsMono-Regular.ttf") +JETBRAINS_MONO_REGULAR: draw.Font_Id = max(draw.Font_Id) // Max so we crash if registration is forgotten + +hellope_shapes :: proc() { + if !sdl.Init({.VIDEO}) do os.exit(1) + window := sdl.CreateWindow("Hellope!", 500, 500, {.HIGH_PIXEL_DENSITY}) + gpu := sdl.CreateGPUDevice({.MSL}, true, nil) + if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1) + if !draw.init(gpu, window) do os.exit(1) + + for { + defer free_all(context.temp_allocator) + ev: sdl.Event + for sdl.PollEvent(&ev) { + if ev.type == .QUIT do return + } + base_layer := draw.begin({w = 500, h = 500}) + + // Background + draw.rectangle(base_layer, {0, 0, 500, 500}, {40, 40, 40, 255}) + + // Shapes demo + draw.rectangle(base_layer, {20, 20, 200, 120}, {80, 120, 200, 255}) + draw.rectangle_lines(base_layer, {20, 20, 200, 120}, draw.WHITE, thick = 2) + draw.rectangle_rounded(base_layer, {240, 20, 240, 120}, 0.3, {200, 80, 80, 255}) + draw.rectangle_gradient( + base_layer, + {20, 160, 460, 60}, + {255, 0, 0, 255}, + {0, 255, 0, 255}, + {0, 0, 255, 255}, + {255, 255, 0, 255}, + ) + + draw.circle(base_layer, {120, 320}, 60, {100, 200, 100, 255}) + draw.circle_lines(base_layer, {120, 320}, 60, draw.WHITE, thick = 2) + draw.circle_gradient(base_layer, {300, 320}, 60, {255, 200, 50, 255}, {200, 50, 50, 255}) + draw.ring(base_layer, {430, 320}, 30, 55, 0, 270, {100, 100, 220, 255}) + + draw.triangle(base_layer, {60, 420}, {180, 480}, {20, 480}, {220, 180, 60, 255}) + draw.line(base_layer, {220, 420}, {460, 480}, {255, 255, 100, 255}, thick = 3) + draw.poly(base_layer, {350, 450}, 6, 40, {180, 100, 220, 255}, rotation = 30) + draw.poly_lines(base_layer, {350, 450}, 6, 40, draw.WHITE, rotation = 30, thick = 2) + + draw.end(gpu, window) + } +} + +hellope_text :: proc() { + if !sdl.Init({.VIDEO}) do os.exit(1) + window := sdl.CreateWindow("Hellope!", 500, 500, {.HIGH_PIXEL_DENSITY}) + gpu := sdl.CreateGPUDevice({.MSL}, true, nil) + if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1) + if !draw.init(gpu, window) do os.exit(1) + JETBRAINS_MONO_REGULAR = draw.register_font(JETBRAINS_MONO_REGULAR_RAW) + + FONT_SIZE :: u16(24) + TEXT_ID :: u32(1) + + font := draw.get_font(JETBRAINS_MONO_REGULAR, FONT_SIZE) + dpi := sdl.GetWindowDisplayScale(window) + + for { + defer free_all(context.temp_allocator) + ev: sdl.Event + for sdl.PollEvent(&ev) { + if ev.type == .QUIT do return + } + base_layer := draw.begin({w = 500, h = 500}) + + // Grey background + draw.rectangle(base_layer, {0, 0, 500, 500}, {127, 127, 127, 255}) + + // Measure and center text + tw, th: c.int + sdl_ttf.GetStringSize(font, "Hellope!", 0, &tw, &th) + text_w := f32(tw) / dpi + text_h := f32(th) / dpi + pos_x := (500.0 - text_w) / 2.0 + pos_y := (500.0 - text_h) / 2.0 + + txt := draw.text( + TEXT_ID, + "Hellope!", + {pos_x, pos_y}, + color = draw.WHITE, + font_id = JETBRAINS_MONO_REGULAR, + font_size = FONT_SIZE, + ) + draw.prepare_text(base_layer, txt) + + draw.end(gpu, window) + } +} + +hellope_clay :: proc() { + if !sdl.Init({.VIDEO}) do os.exit(1) + window := sdl.CreateWindow("Hellope!", 500, 500, {.HIGH_PIXEL_DENSITY}) + gpu := sdl.CreateGPUDevice({.MSL}, true, nil) + if !sdl.ClaimWindowForGPUDevice(gpu, window) do os.exit(1) + if !draw.init(gpu, window) do os.exit(1) + JETBRAINS_MONO_REGULAR = draw.register_font(JETBRAINS_MONO_REGULAR_RAW) + + text_config := clay.TextElementConfig { + fontId = JETBRAINS_MONO_REGULAR, + fontSize = 24, + textColor = {255, 255, 255, 255}, + } + + for { + defer free_all(context.temp_allocator) + ev: sdl.Event + for sdl.PollEvent(&ev) { + if ev.type == .QUIT do return + } + base_layer := draw.begin({w = 500, h = 500}) + clay.SetLayoutDimensions({width = base_layer.bounds.w, height = base_layer.bounds.h}) + clay.BeginLayout() + if clay.UI()( + { + id = clay.ID("outer"), + layout = { + sizing = {clay.SizingGrow({}), clay.SizingGrow({})}, + childAlignment = {x = .Center, y = .Center}, + }, + backgroundColor = {127, 127, 127, 255}, + }, + ) { + clay.Text("Hellope!", &text_config) + } + clay_batch := draw.ClayBatch { + bounds = base_layer.bounds, + cmds = clay.EndLayout(), + } + draw.prepare_clay_batch(base_layer, &clay_batch, {0, 0}) + draw.end(gpu, window) + } +} diff --git a/draw/examples/main.odin b/draw/examples/main.odin new file mode 100644 index 0000000..75ebd48 --- /dev/null +++ b/draw/examples/main.odin @@ -0,0 +1,73 @@ +package examples + +import "core:fmt" +import "core:mem" +import "core:os" + +main :: proc() { + //----- Tracking allocator ---------------------------------- + { + tracking_temp_allocator := false + // Temp + track_temp: mem.Tracking_Allocator + if tracking_temp_allocator { + mem.tracking_allocator_init(&track_temp, context.temp_allocator) + context.temp_allocator = mem.tracking_allocator(&track_temp) + } + // Default + track: mem.Tracking_Allocator + mem.tracking_allocator_init(&track, context.allocator) + context.allocator = mem.tracking_allocator(&track) + // Log a warning about any memory that was not freed by the end of the program. + // This could be fine for some global state or it could be a memory leak. + defer { + // Temp allocator + if tracking_temp_allocator { + if len(track_temp.allocation_map) > 0 { + fmt.eprintf("=== %v allocations not freed - temp allocator: ===\n", len(track_temp.allocation_map)) + for _, entry in track_temp.allocation_map { + fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location) + } + } + if len(track_temp.bad_free_array) > 0 { + fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array)) + for entry in track_temp.bad_free_array { + fmt.eprintf("- %p @ %v\n", entry.memory, entry.location) + } + } + mem.tracking_allocator_destroy(&track_temp) + } + // Default allocator + if len(track.allocation_map) > 0 { + fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map)) + for _, entry in track.allocation_map { + fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location) + } + } + if len(track.bad_free_array) > 0 { + fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array)) + for entry in track.bad_free_array { + fmt.eprintf("- %p @ %v\n", entry.memory, entry.location) + } + } + mem.tracking_allocator_destroy(&track) + } + } + + args := os.args + if len(args) < 2 { + fmt.eprintln("Usage: examples ") + fmt.eprintln("Available examples: hellope-shapes, hellope-text, hellope-clay") + os.exit(1) + } + + switch args[1] { + case "hellope-clay": hellope_clay() + case "hellope-shapes": hellope_shapes() + case "hellope-text": hellope_text() + case: + fmt.eprintf("Unknown example: %v\n", args[1]) + fmt.eprintln("Available examples: hellope-shapes, hellope-text, hellope-clay") + os.exit(1) + } +} diff --git a/draw/pipeline_2d_base.odin b/draw/pipeline_2d_base.odin new file mode 100644 index 0000000..8315c89 --- /dev/null +++ b/draw/pipeline_2d_base.odin @@ -0,0 +1,688 @@ +package draw + +import "core:c" +import "core:log" +import "core:mem" +import sdl "vendor:sdl3" + +Vertex :: struct { + position: [2]f32, + uv: [2]f32, + color: Color, +} + +TextBatch :: struct { + atlas_texture: ^sdl.GPUTexture, + vertex_start: u32, + vertex_count: u32, + index_start: u32, + index_count: u32, +} + +// ---------------------------------------------------------------------------------------------------------------- +// ----- SDF primitive types ----------- +// ---------------------------------------------------------------------------------------------------------------- + +Shape_Kind :: enum u8 { + Solid = 0, + RRect = 1, + Circle = 2, + Ellipse = 3, + Segment = 4, + Ring_Arc = 5, + NGon = 6, +} + +Shape_Flag :: enum u8 { + Stroke, +} + +Shape_Flags :: bit_set[Shape_Flag;u8] + +RRect_Params :: struct { + half_size: [2]f32, + radii: [4]f32, + soft_px: f32, + stroke_px: f32, +} + +Circle_Params :: struct { + radius: f32, + soft_px: f32, + stroke_px: f32, + _: [5]f32, +} + +Ellipse_Params :: struct { + radii: [2]f32, + soft_px: f32, + stroke_px: f32, + _: [4]f32, +} + +Segment_Params :: struct { + a: [2]f32, + b: [2]f32, + width: f32, + soft_px: f32, + _: [2]f32, +} + +Ring_Arc_Params :: struct { + inner_radius: f32, + outer_radius: f32, + start_rad: f32, + end_rad: f32, + soft_px: f32, + _: [3]f32, +} + +NGon_Params :: struct { + radius: f32, + rotation: f32, + sides: f32, + soft_px: f32, + stroke_px: f32, + _: [3]f32, +} + +Shape_Params :: struct #raw_union { + rrect: RRect_Params, + circle: Circle_Params, + ellipse: Ellipse_Params, + segment: Segment_Params, + ring_arc: Ring_Arc_Params, + ngon: NGon_Params, + raw: [8]f32, +} + +#assert(size_of(Shape_Params) == 32) + +// GPU layout: 64 bytes, std430-compatible. The shader declares this as a storage buffer struct. +Primitive :: struct { + bounds: [4]f32, // 0: min_x, min_y, max_x, max_y (world-space, pre-DPI) + color: Color, // 16: u8x4, unpacked in shader via unpackUnorm4x8 + kind_flags: u32, // 20: (kind as u32) | (flags as u32 << 8) + _pad: [2]f32, // 24: alignment to vec4 boundary + params: Shape_Params, // 32: two vec4s of shape params +} + +#assert(size_of(Primitive) == 64) + +pack_kind_flags :: #force_inline proc(kind: Shape_Kind, flags: Shape_Flags) -> u32 { + return u32(kind) | (u32(transmute(u8)flags) << 8) +} + +Pipeline_2D_Base :: struct { + sdl_pipeline: ^sdl.GPUGraphicsPipeline, + vertex_buffer: Buffer, + index_buffer: Buffer, + unit_quad_buffer: ^sdl.GPUBuffer, + primitive_buffer: Buffer, + white_texture: ^sdl.GPUTexture, + sampler: ^sdl.GPUSampler, +} + +@(private) +create_pipeline_2d_base :: proc( + device: ^sdl.GPUDevice, + window: ^sdl.Window, + sample_count: sdl.GPUSampleCount, +) -> ( + pipeline: Pipeline_2D_Base, + ok: bool, +) { + // On failure, clean up any partially-created resources + defer if !ok { + if pipeline.sampler != nil do sdl.ReleaseGPUSampler(device, pipeline.sampler) + if pipeline.white_texture != nil do sdl.ReleaseGPUTexture(device, pipeline.white_texture) + if pipeline.unit_quad_buffer != nil do sdl.ReleaseGPUBuffer(device, pipeline.unit_quad_buffer) + if pipeline.primitive_buffer.gpu != nil do destroy_buffer(device, &pipeline.primitive_buffer) + if pipeline.index_buffer.gpu != nil do destroy_buffer(device, &pipeline.index_buffer) + if pipeline.vertex_buffer.gpu != nil do destroy_buffer(device, &pipeline.vertex_buffer) + if pipeline.sdl_pipeline != nil do sdl.ReleaseGPUGraphicsPipeline(device, pipeline.sdl_pipeline) + } + + when ODIN_OS == .Darwin { + base_2d_vert_raw := #load("shaders/generated/base_2d.vert.metal") + base_2d_frag_raw := #load("shaders/generated/base_2d.frag.metal") + } else { + base_2d_vert_raw := #load("shaders/generated/base_2d.vert.spv") + base_2d_frag_raw := #load("shaders/generated/base_2d.frag.spv") + } + + log.debug("Loaded", len(base_2d_vert_raw), "vert bytes") + log.debug("Loaded", len(base_2d_frag_raw), "frag bytes") + + vert_info := sdl.GPUShaderCreateInfo { + code_size = len(base_2d_vert_raw), + code = raw_data(base_2d_vert_raw), + entrypoint = ENTRY_POINT, + format = SHADER_TYPE, + stage = .VERTEX, + num_uniform_buffers = 1, + num_storage_buffers = 1, + } + + frag_info := sdl.GPUShaderCreateInfo { + code_size = len(base_2d_frag_raw), + code = raw_data(base_2d_frag_raw), + entrypoint = ENTRY_POINT, + format = SHADER_TYPE, + stage = .FRAGMENT, + num_samplers = 1, + } + + vert_shader := sdl.CreateGPUShader(device, vert_info) + if vert_shader == nil { + log.errorf("Could not create draw vertex shader: %s", sdl.GetError()) + return pipeline, false + } + + frag_shader := sdl.CreateGPUShader(device, frag_info) + if frag_shader == nil { + sdl.ReleaseGPUShader(device, vert_shader) + log.errorf("Could not create draw fragment shader: %s", sdl.GetError()) + return pipeline, false + } + + vertex_attributes: [3]sdl.GPUVertexAttribute = { + // position (GLSL location 0) + sdl.GPUVertexAttribute{buffer_slot = 0, location = 0, format = .FLOAT2, offset = 0}, + // uv (GLSL location 1) + sdl.GPUVertexAttribute{buffer_slot = 0, location = 1, format = .FLOAT2, offset = size_of([2]f32)}, + // color (GLSL location 2, u8x4 normalized to float by GPU) + sdl.GPUVertexAttribute{buffer_slot = 0, location = 2, format = .UBYTE4_NORM, offset = size_of([2]f32) * 2}, + } + + pipeline_info := sdl.GPUGraphicsPipelineCreateInfo { + vertex_shader = vert_shader, + fragment_shader = frag_shader, + primitive_type = .TRIANGLELIST, + multisample_state = sdl.GPUMultisampleState{sample_count = sample_count}, + target_info = sdl.GPUGraphicsPipelineTargetInfo { + color_target_descriptions = &sdl.GPUColorTargetDescription { + format = sdl.GetGPUSwapchainTextureFormat(device, window), + blend_state = sdl.GPUColorTargetBlendState { + enable_blend = true, + enable_color_write_mask = true, + src_color_blendfactor = .SRC_ALPHA, + dst_color_blendfactor = .ONE_MINUS_SRC_ALPHA, + color_blend_op = .ADD, + src_alpha_blendfactor = .SRC_ALPHA, + dst_alpha_blendfactor = .ONE_MINUS_SRC_ALPHA, + alpha_blend_op = .ADD, + color_write_mask = sdl.GPUColorComponentFlags{.R, .G, .B, .A}, + }, + }, + num_color_targets = 1, + }, + vertex_input_state = sdl.GPUVertexInputState { + vertex_buffer_descriptions = &sdl.GPUVertexBufferDescription { + slot = 0, + input_rate = .VERTEX, + pitch = size_of(Vertex), + }, + num_vertex_buffers = 1, + vertex_attributes = raw_data(vertex_attributes[:]), + num_vertex_attributes = 3, + }, + } + + pipeline.sdl_pipeline = sdl.CreateGPUGraphicsPipeline(device, pipeline_info) + // Shaders are no longer needed regardless of pipeline creation success + sdl.ReleaseGPUShader(device, vert_shader) + sdl.ReleaseGPUShader(device, frag_shader) + if pipeline.sdl_pipeline == nil { + log.errorf("Failed to create draw graphics pipeline: %s", sdl.GetError()) + return pipeline, false + } + + // Create vertex buffer + vb_ok: bool + pipeline.vertex_buffer, vb_ok = create_buffer( + device, + size_of(Vertex) * BUFFER_INIT_SIZE, + sdl.GPUBufferUsageFlags{.VERTEX}, + ) + if !vb_ok do return pipeline, false + + // Create index buffer (used by text) + ib_ok: bool + pipeline.index_buffer, ib_ok = create_buffer( + device, + size_of(c.int) * BUFFER_INIT_SIZE, + sdl.GPUBufferUsageFlags{.INDEX}, + ) + if !ib_ok do return pipeline, false + + // Create primitive storage buffer (used by SDF instanced drawing) + pb_ok: bool + pipeline.primitive_buffer, pb_ok = create_buffer( + device, + size_of(Primitive) * BUFFER_INIT_SIZE, + sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ}, + ) + if !pb_ok do return pipeline, false + + // Create static 6-vertex unit quad buffer (two triangles, TRIANGLELIST) + pipeline.unit_quad_buffer = sdl.CreateGPUBuffer( + device, + sdl.GPUBufferCreateInfo{usage = {.VERTEX}, size = 6 * size_of(Vertex)}, + ) + if pipeline.unit_quad_buffer == nil { + log.errorf("Failed to create unit quad buffer: %s", sdl.GetError()) + return pipeline, false + } + + // Create 1x1 white pixel texture + pipeline.white_texture = sdl.CreateGPUTexture( + device, + sdl.GPUTextureCreateInfo { + type = .D2, + format = .R8G8B8A8_UNORM, + usage = {.SAMPLER}, + width = 1, + height = 1, + layer_count_or_depth = 1, + num_levels = 1, + sample_count = ._1, + }, + ) + if pipeline.white_texture == nil { + log.errorf("Failed to create white pixel texture: %s", sdl.GetError()) + return pipeline, false + } + + // Upload white pixel and unit quad data in a single command buffer + white_pixel := [4]u8{255, 255, 255, 255} + white_transfer := sdl.CreateGPUTransferBuffer( + device, + sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = size_of(white_pixel)}, + ) + if white_transfer == nil { + log.errorf("Failed to create white pixel transfer buffer: %s", sdl.GetError()) + return pipeline, false + } + defer sdl.ReleaseGPUTransferBuffer(device, white_transfer) + + white_ptr := sdl.MapGPUTransferBuffer(device, white_transfer, false) + if white_ptr == nil { + log.errorf("Failed to map white pixel transfer buffer: %s", sdl.GetError()) + return pipeline, false + } + mem.copy(white_ptr, &white_pixel, size_of(white_pixel)) + sdl.UnmapGPUTransferBuffer(device, white_transfer) + + quad_verts := [6]Vertex{ + {position = {0, 0}}, {position = {1, 0}}, {position = {0, 1}}, + {position = {0, 1}}, {position = {1, 0}}, {position = {1, 1}}, + } + quad_transfer := sdl.CreateGPUTransferBuffer( + device, + sdl.GPUTransferBufferCreateInfo{usage = .UPLOAD, size = size_of(quad_verts)}, + ) + if quad_transfer == nil { + log.errorf("Failed to create unit quad transfer buffer: %s", sdl.GetError()) + return pipeline, false + } + defer sdl.ReleaseGPUTransferBuffer(device, quad_transfer) + + quad_ptr := sdl.MapGPUTransferBuffer(device, quad_transfer, false) + if quad_ptr == nil { + log.errorf("Failed to map unit quad transfer buffer: %s", sdl.GetError()) + return pipeline, false + } + mem.copy(quad_ptr, &quad_verts, size_of(quad_verts)) + sdl.UnmapGPUTransferBuffer(device, quad_transfer) + + upload_cmd := sdl.AcquireGPUCommandBuffer(device) + if upload_cmd == nil { + log.errorf("Failed to acquire command buffer for init upload: %s", sdl.GetError()) + return pipeline, false + } + upload_pass := sdl.BeginGPUCopyPass(upload_cmd) + + sdl.UploadToGPUTexture( + upload_pass, + sdl.GPUTextureTransferInfo{transfer_buffer = white_transfer}, + sdl.GPUTextureRegion{texture = pipeline.white_texture, w = 1, h = 1, d = 1}, + false, + ) + + sdl.UploadToGPUBuffer( + upload_pass, + sdl.GPUTransferBufferLocation{transfer_buffer = quad_transfer}, + sdl.GPUBufferRegion{ + buffer = pipeline.unit_quad_buffer, + offset = 0, + size = size_of(quad_verts), + }, + false, + ) + + sdl.EndGPUCopyPass(upload_pass) + if !sdl.SubmitGPUCommandBuffer(upload_cmd) { + log.errorf("Failed to submit init upload command buffer: %s", sdl.GetError()) + return pipeline, false + } + + log.debug("White pixel texture and unit quad buffer created and uploaded") + + // Create sampler (shared by shapes and text) + pipeline.sampler = sdl.CreateGPUSampler( + device, + sdl.GPUSamplerCreateInfo { + min_filter = .LINEAR, + mag_filter = .LINEAR, + mipmap_mode = .LINEAR, + address_mode_u = .CLAMP_TO_EDGE, + address_mode_v = .CLAMP_TO_EDGE, + address_mode_w = .CLAMP_TO_EDGE, + }, + ) + if pipeline.sampler == nil { + log.errorf("Could not create GPU sampler: %s", sdl.GetError()) + return pipeline, false + } + + log.debug("Done creating unified draw pipeline") + return pipeline, true +} + +@(private) +upload :: proc(device: ^sdl.GPUDevice, pass: ^sdl.GPUCopyPass) { + // Upload vertices (shapes then text into one buffer) + shape_vert_count := u32(len(GLOB.tmp_shape_verts)) + text_vert_count := u32(len(GLOB.tmp_text_verts)) + total_vert_count := shape_vert_count + text_vert_count + + if total_vert_count > 0 { + total_vert_size := total_vert_count * size_of(Vertex) + shape_vert_size := shape_vert_count * size_of(Vertex) + text_vert_size := text_vert_count * size_of(Vertex) + + grow_buffer_if_needed( + device, + &GLOB.pipeline_2d_base.vertex_buffer, + total_vert_size, + sdl.GPUBufferUsageFlags{.VERTEX}, + ) + + v_array := sdl.MapGPUTransferBuffer(device, GLOB.pipeline_2d_base.vertex_buffer.transfer, false) + if v_array == nil { + log.panicf("Failed to map vertex transfer buffer: %s", sdl.GetError()) + } + if shape_vert_size > 0 { + mem.copy(v_array, raw_data(GLOB.tmp_shape_verts), int(shape_vert_size)) + } + if text_vert_size > 0 { + mem.copy( + rawptr(uintptr(v_array) + uintptr(shape_vert_size)), + raw_data(GLOB.tmp_text_verts), + int(text_vert_size), + ) + } + sdl.UnmapGPUTransferBuffer(device, GLOB.pipeline_2d_base.vertex_buffer.transfer) + + sdl.UploadToGPUBuffer( + pass, + sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.pipeline_2d_base.vertex_buffer.transfer}, + sdl.GPUBufferRegion{ + buffer = GLOB.pipeline_2d_base.vertex_buffer.gpu, + offset = 0, + size = total_vert_size, + }, + false, + ) + } + + // Upload text indices + index_count := u32(len(GLOB.tmp_text_indices)) + if index_count > 0 { + index_size := index_count * size_of(c.int) + + grow_buffer_if_needed( + device, + &GLOB.pipeline_2d_base.index_buffer, + index_size, + sdl.GPUBufferUsageFlags{.INDEX}, + ) + + i_array := sdl.MapGPUTransferBuffer(device, GLOB.pipeline_2d_base.index_buffer.transfer, false) + if i_array == nil { + log.panicf("Failed to map index transfer buffer: %s", sdl.GetError()) + } + mem.copy(i_array, raw_data(GLOB.tmp_text_indices), int(index_size)) + sdl.UnmapGPUTransferBuffer(device, GLOB.pipeline_2d_base.index_buffer.transfer) + + sdl.UploadToGPUBuffer( + pass, + sdl.GPUTransferBufferLocation{transfer_buffer = GLOB.pipeline_2d_base.index_buffer.transfer}, + sdl.GPUBufferRegion{ + buffer = GLOB.pipeline_2d_base.index_buffer.gpu, + offset = 0, + size = index_size, + }, + false, + ) + } + + // Upload SDF primitives + prim_count := u32(len(GLOB.tmp_primitives)) + if prim_count > 0 { + prim_size := prim_count * size_of(Primitive) + + grow_buffer_if_needed( + device, + &GLOB.pipeline_2d_base.primitive_buffer, + prim_size, + sdl.GPUBufferUsageFlags{.GRAPHICS_STORAGE_READ}, + ) + + p_array := sdl.MapGPUTransferBuffer( + device, GLOB.pipeline_2d_base.primitive_buffer.transfer, false, + ) + if p_array == nil { + log.panicf("Failed to map primitive transfer buffer: %s", sdl.GetError()) + } + mem.copy(p_array, raw_data(GLOB.tmp_primitives), int(prim_size)) + sdl.UnmapGPUTransferBuffer(device, GLOB.pipeline_2d_base.primitive_buffer.transfer) + + sdl.UploadToGPUBuffer( + pass, + sdl.GPUTransferBufferLocation{ + transfer_buffer = GLOB.pipeline_2d_base.primitive_buffer.transfer, + }, + sdl.GPUBufferRegion{ + buffer = GLOB.pipeline_2d_base.primitive_buffer.gpu, + offset = 0, + size = prim_size, + }, + false, + ) + } +} + +@(private) +draw_layer :: proc( + device: ^sdl.GPUDevice, + window: ^sdl.Window, + cmd_buffer: ^sdl.GPUCommandBuffer, + render_texture: ^sdl.GPUTexture, + swapchain_w: u32, + swapchain_h: u32, + clear_color: [4]f32, + layer: ^Layer, +) { + if layer.sub_batch_len == 0 { + if !GLOB.cleared { + pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = render_texture, + clear_color = sdl.FColor { + clear_color[0], clear_color[1], clear_color[2], clear_color[3], + }, + load_op = .CLEAR, + store_op = .STORE, + }, + 1, + nil, + ) + sdl.EndGPURenderPass(pass) + GLOB.cleared = true + } + return + } + + render_pass := sdl.BeginGPURenderPass( + cmd_buffer, + &sdl.GPUColorTargetInfo { + texture = render_texture, + clear_color = sdl.FColor { + clear_color[0], clear_color[1], clear_color[2], clear_color[3], + }, + load_op = GLOB.cleared ? .LOAD : .CLEAR, + store_op = .STORE, + }, + 1, + nil, + ) + GLOB.cleared = true + + sdl.BindGPUGraphicsPipeline(render_pass, GLOB.pipeline_2d_base.sdl_pipeline) + + // Bind storage buffer (read by vertex shader in SDF mode) + sdl.BindGPUVertexStorageBuffers( + render_pass, + 0, + ([^]^sdl.GPUBuffer)(&GLOB.pipeline_2d_base.primitive_buffer.gpu), + 1, + ) + + // Always bind index buffer — harmless if no indexed draws are issued + sdl.BindGPUIndexBuffer( + render_pass, + sdl.GPUBufferBinding{buffer = GLOB.pipeline_2d_base.index_buffer.gpu, offset = 0}, + ._32BIT, + ) + + // Shorthand aliases for frequently-used pipeline resources + main_vbuf := GLOB.pipeline_2d_base.vertex_buffer.gpu + unit_quad := GLOB.pipeline_2d_base.unit_quad_buffer + white := GLOB.pipeline_2d_base.white_texture + sampler := GLOB.pipeline_2d_base.sampler + w := f32(swapchain_w) + h := f32(swapchain_h) + + // Initial GPU state: tessellated mode, main vertex buffer, no atlas bound yet + push_globals(cmd_buffer, w, h, .Tessellated) + sdl.BindGPUVertexBuffers( + render_pass, 0, &sdl.GPUBufferBinding{buffer = main_vbuf, offset = 0}, 1, + ) + + current_mode: Draw_Mode = .Tessellated + current_vbuf := main_vbuf + current_atlas: ^sdl.GPUTexture + + // Text vertices live after shape vertices in the GPU vertex buffer + text_vertex_gpu_base := u32(len(GLOB.tmp_shape_verts)) + + for &scissor in GLOB.scissors[layer.scissor_start:][:layer.scissor_len] { + sdl.SetGPUScissor(render_pass, scissor.bounds) + + for &batch in GLOB.tmp_sub_batches[scissor.sub_batch_start:][:scissor.sub_batch_len] { + switch batch.kind { + case .Shapes: + if current_mode != .Tessellated { + push_globals(cmd_buffer, w, h, .Tessellated) + current_mode = .Tessellated + } + if current_vbuf != main_vbuf { + sdl.BindGPUVertexBuffers( + render_pass, 0, + &sdl.GPUBufferBinding{buffer = main_vbuf, offset = 0}, 1, + ) + current_vbuf = main_vbuf + } + if current_atlas != white { + sdl.BindGPUFragmentSamplers( + render_pass, 0, + &sdl.GPUTextureSamplerBinding{texture = white, sampler = sampler}, 1, + ) + current_atlas = white + } + sdl.DrawGPUPrimitives(render_pass, batch.count, 1, batch.offset, 0) + + case .Text: + if current_mode != .Tessellated { + push_globals(cmd_buffer, w, h, .Tessellated) + current_mode = .Tessellated + } + if current_vbuf != main_vbuf { + sdl.BindGPUVertexBuffers( + render_pass, 0, + &sdl.GPUBufferBinding{buffer = main_vbuf, offset = 0}, 1, + ) + current_vbuf = main_vbuf + } + chunk := &GLOB.tmp_text_batches[batch.offset] + if current_atlas != chunk.atlas_texture { + sdl.BindGPUFragmentSamplers( + render_pass, 0, + &sdl.GPUTextureSamplerBinding { + texture = chunk.atlas_texture, + sampler = sampler, + }, + 1, + ) + current_atlas = chunk.atlas_texture + } + sdl.DrawGPUIndexedPrimitives( + render_pass, + chunk.index_count, + 1, + chunk.index_start, + i32(text_vertex_gpu_base + chunk.vertex_start), + 0, + ) + + case .SDF: + if current_mode != .SDF { + push_globals(cmd_buffer, w, h, .SDF) + current_mode = .SDF + } + if current_vbuf != unit_quad { + sdl.BindGPUVertexBuffers( + render_pass, 0, + &sdl.GPUBufferBinding{buffer = unit_quad, offset = 0}, 1, + ) + current_vbuf = unit_quad + } + if current_atlas != white { + sdl.BindGPUFragmentSamplers( + render_pass, 0, + &sdl.GPUTextureSamplerBinding{texture = white, sampler = sampler}, 1, + ) + current_atlas = white + } + sdl.DrawGPUPrimitives(render_pass, 6, batch.count, 0, batch.offset) + } + } + } + + sdl.EndGPURenderPass(render_pass) +} + +destroy_pipeline_2d_base :: proc(device: ^sdl.GPUDevice, pipeline: ^Pipeline_2D_Base) { + destroy_buffer(device, &pipeline.vertex_buffer) + destroy_buffer(device, &pipeline.index_buffer) + destroy_buffer(device, &pipeline.primitive_buffer) + if pipeline.unit_quad_buffer != nil { + sdl.ReleaseGPUBuffer(device, pipeline.unit_quad_buffer) + } + sdl.ReleaseGPUTexture(device, pipeline.white_texture) + sdl.ReleaseGPUSampler(device, pipeline.sampler) + sdl.ReleaseGPUGraphicsPipeline(device, pipeline.sdl_pipeline) +} diff --git a/draw/shaders/generated/base_2d.frag.metal b/draw/shaders/generated/base_2d.frag.metal new file mode 100644 index 0000000..933919c --- /dev/null +++ b/draw/shaders/generated/base_2d.frag.metal @@ -0,0 +1,281 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() +template +inline Tx mod(Tx x, Ty y) +{ + return x - y * floor(x / y); +} + +struct main0_out +{ + float4 out_color [[color(0)]]; +}; + +struct main0_in +{ + float4 f_color [[user(locn0)]]; + float2 f_local_or_uv [[user(locn1)]]; + float4 f_params [[user(locn2)]]; + float4 f_params2 [[user(locn3)]]; + uint f_kind_flags [[user(locn4)]]; +}; + +static inline __attribute__((always_inline)) +float sdRoundedBox(thread const float2& p, thread const float2& b, thread float4& r) +{ + float2 _56; + if (p.x > 0.0) + { + _56 = r.xy; + } + else + { + _56 = r.zw; + } + r.x = _56.x; + r.y = _56.y; + float _73; + if (p.y > 0.0) + { + _73 = r.x; + } + else + { + _73 = r.y; + } + r.x = _73; + float2 q = (abs(p) - b) + float2(r.x); + return (fast::min(fast::max(q.x, q.y), 0.0) + length(fast::max(q, float2(0.0)))) - r.x; +} + +static inline __attribute__((always_inline)) +float sdf_stroke(thread const float& d, thread const float& stroke_width) +{ + return abs(d) - (stroke_width * 0.5); +} + +static inline __attribute__((always_inline)) +float sdCircle(thread const float2& p, thread const float& r) +{ + return length(p) - r; +} + +static inline __attribute__((always_inline)) +float sdEllipse(thread float2& p, thread float2& ab) +{ + p = abs(p); + if (p.x > p.y) + { + p = p.yx; + ab = ab.yx; + } + float l = (ab.y * ab.y) - (ab.x * ab.x); + float m = (ab.x * p.x) / l; + float m2 = m * m; + float n = (ab.y * p.y) / l; + float n2 = n * n; + float c = ((m2 + n2) - 1.0) / 3.0; + float c3 = (c * c) * c; + float q = c3 + ((m2 * n2) * 2.0); + float d = c3 + (m2 * n2); + float g = m + (m * n2); + float co; + if (d < 0.0) + { + float h = acos(q / c3) / 3.0; + float s = cos(h); + float t = sin(h) * 1.73205077648162841796875; + float rx = sqrt(((-c) * ((s + t) + 2.0)) + m2); + float ry = sqrt(((-c) * ((s - t) + 2.0)) + m2); + co = (((ry + (sign(l) * rx)) + (abs(g) / (rx * ry))) - m) / 2.0; + } + else + { + float h_1 = ((2.0 * m) * n) * sqrt(d); + float s_1 = sign(q + h_1) * powr(abs(q + h_1), 0.3333333432674407958984375); + float u = sign(q - h_1) * powr(abs(q - h_1), 0.3333333432674407958984375); + float rx_1 = (((-s_1) - u) - (c * 4.0)) + (2.0 * m2); + float ry_1 = (s_1 - u) * 1.73205077648162841796875; + float rm = sqrt((rx_1 * rx_1) + (ry_1 * ry_1)); + co = (((ry_1 / sqrt(rm - rx_1)) + ((2.0 * g) / rm)) - m) / 2.0; + } + float2 r = ab * float2(co, sqrt(1.0 - (co * co))); + return length(r - p) * sign(p.y - r.y); +} + +static inline __attribute__((always_inline)) +float sdSegment(thread const float2& p, thread const float2& a, thread const float2& b) +{ + float2 pa = p - a; + float2 ba = b - a; + float h = fast::clamp(dot(pa, ba) / dot(ba, ba), 0.0, 1.0); + return length(pa - (ba * h)); +} + +static inline __attribute__((always_inline)) +float sdf_alpha(thread const float& d, thread const float& soft) +{ + return 1.0 - smoothstep(-soft, soft, d); +} + +fragment main0_out main0(main0_in in [[stage_in]], texture2d tex [[texture(0)]], sampler texSmplr [[sampler(0)]]) +{ + main0_out out = {}; + uint kind = in.f_kind_flags & 255u; + uint flags = (in.f_kind_flags >> 8u) & 255u; + if (kind == 0u) + { + out.out_color = in.f_color * tex.sample(texSmplr, in.f_local_or_uv); + return out; + } + float d = 1000000015047466219876688855040.0; + float soft = 1.0; + if (kind == 1u) + { + float2 b = in.f_params.xy; + float4 r = float4(in.f_params.zw, in.f_params2.xy); + soft = fast::max(in.f_params2.z, 1.0); + float stroke_px = in.f_params2.w; + float2 param = in.f_local_or_uv; + float2 param_1 = b; + float4 param_2 = r; + float _453 = sdRoundedBox(param, param_1, param_2); + d = _453; + if ((flags & 1u) != 0u) + { + float param_3 = d; + float param_4 = stroke_px; + d = sdf_stroke(param_3, param_4); + } + } + else + { + if (kind == 2u) + { + float radius = in.f_params.x; + soft = fast::max(in.f_params.y, 1.0); + float stroke_px_1 = in.f_params.z; + float2 param_5 = in.f_local_or_uv; + float param_6 = radius; + d = sdCircle(param_5, param_6); + if ((flags & 1u) != 0u) + { + float param_7 = d; + float param_8 = stroke_px_1; + d = sdf_stroke(param_7, param_8); + } + } + else + { + if (kind == 3u) + { + float2 ab = in.f_params.xy; + soft = fast::max(in.f_params.z, 1.0); + float stroke_px_2 = in.f_params.w; + float2 param_9 = in.f_local_or_uv; + float2 param_10 = ab; + float _511 = sdEllipse(param_9, param_10); + d = _511; + if ((flags & 1u) != 0u) + { + float param_11 = d; + float param_12 = stroke_px_2; + d = sdf_stroke(param_11, param_12); + } + } + else + { + if (kind == 4u) + { + float2 a = in.f_params.xy; + float2 b_1 = in.f_params.zw; + float width = in.f_params2.x; + soft = fast::max(in.f_params2.y, 1.0); + float2 param_13 = in.f_local_or_uv; + float2 param_14 = a; + float2 param_15 = b_1; + d = sdSegment(param_13, param_14, param_15) - (width * 0.5); + } + else + { + if (kind == 5u) + { + float inner = in.f_params.x; + float outer = in.f_params.y; + float start_rad = in.f_params.z; + float end_rad = in.f_params.w; + soft = fast::max(in.f_params2.x, 1.0); + float r_1 = length(in.f_local_or_uv); + float d_ring = fast::max(inner - r_1, r_1 - outer); + float angle = precise::atan2(in.f_local_or_uv.y, in.f_local_or_uv.x); + if (angle < 0.0) + { + angle += 6.283185482025146484375; + } + float ang_start = start_rad; + float ang_end = end_rad; + if (ang_start < 0.0) + { + ang_start += 6.283185482025146484375; + } + if (ang_end < 0.0) + { + ang_end += 6.283185482025146484375; + } + float _615; + if (ang_end > ang_start) + { + _615 = float((angle >= ang_start) && (angle <= ang_end)); + } + else + { + _615 = float((angle >= ang_start) || (angle <= ang_end)); + } + float in_arc = _615; + if (abs(ang_end - ang_start) >= 6.282185077667236328125) + { + in_arc = 1.0; + } + d = (in_arc > 0.5) ? d_ring : 1000000015047466219876688855040.0; + } + else + { + if (kind == 6u) + { + float radius_1 = in.f_params.x; + float rotation = in.f_params.y; + float sides = in.f_params.z; + soft = fast::max(in.f_params.w, 1.0); + float stroke_px_3 = in.f_params2.x; + float2 p = in.f_local_or_uv; + float c = cos(rotation); + float s = sin(rotation); + p = float2x2(float2(c, -s), float2(s, c)) * p; + float an = 3.1415927410125732421875 / sides; + float bn = mod(precise::atan2(p.y, p.x), 2.0 * an) - an; + d = (length(p) * cos(bn)) - radius_1; + if ((flags & 1u) != 0u) + { + float param_16 = d; + float param_17 = stroke_px_3; + d = sdf_stroke(param_16, param_17); + } + } + } + } + } + } + } + float param_18 = d; + float param_19 = soft; + float alpha = sdf_alpha(param_18, param_19); + out.out_color = float4(in.f_color.xyz, in.f_color.w * alpha); + return out; +} + diff --git a/draw/shaders/generated/base_2d.frag.spv b/draw/shaders/generated/base_2d.frag.spv new file mode 100644 index 0000000..a06c2ca Binary files /dev/null and b/draw/shaders/generated/base_2d.frag.spv differ diff --git a/draw/shaders/generated/base_2d.vert.metal b/draw/shaders/generated/base_2d.vert.metal new file mode 100644 index 0000000..a9bb3fe --- /dev/null +++ b/draw/shaders/generated/base_2d.vert.metal @@ -0,0 +1,88 @@ +#include +#include + +using namespace metal; + +struct Uniforms +{ + float4x4 projection; + float dpi_scale; + uint mode; +}; + +struct Primitive +{ + float4 bounds; + uint color; + uint kind_flags; + float2 _pad; + float4 params; + float4 params2; +}; + +struct Primitive_1 +{ + float4 bounds; + uint color; + uint kind_flags; + float2 _pad; + float4 params; + float4 params2; +}; + +struct Primitives +{ + Primitive_1 primitives[1]; +}; + +struct main0_out +{ + float4 f_color [[user(locn0)]]; + float2 f_local_or_uv [[user(locn1)]]; + float4 f_params [[user(locn2)]]; + float4 f_params2 [[user(locn3)]]; + uint f_kind_flags [[user(locn4)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + float2 v_position [[attribute(0)]]; + float2 v_uv [[attribute(1)]]; + float4 v_color [[attribute(2)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant Uniforms& _12 [[buffer(0)]], const device Primitives& _70 [[buffer(1)]], uint gl_InstanceIndex [[instance_id]]) +{ + main0_out out = {}; + if (_12.mode == 0u) + { + out.f_color = in.v_color; + out.f_local_or_uv = in.v_uv; + out.f_params = float4(0.0); + out.f_params2 = float4(0.0); + out.f_kind_flags = 0u; + out.gl_Position = _12.projection * float4(in.v_position * _12.dpi_scale, 0.0, 1.0); + } + else + { + Primitive p; + p.bounds = _70.primitives[int(gl_InstanceIndex)].bounds; + p.color = _70.primitives[int(gl_InstanceIndex)].color; + p.kind_flags = _70.primitives[int(gl_InstanceIndex)].kind_flags; + p._pad = _70.primitives[int(gl_InstanceIndex)]._pad; + p.params = _70.primitives[int(gl_InstanceIndex)].params; + p.params2 = _70.primitives[int(gl_InstanceIndex)].params2; + float2 corner = in.v_position; + float2 world_pos = mix(p.bounds.xy, p.bounds.zw, corner); + float2 center = (p.bounds.xy + p.bounds.zw) * 0.5; + out.f_color = unpack_unorm4x8_to_float(p.color); + out.f_local_or_uv = (world_pos - center) * _12.dpi_scale; + out.f_params = p.params; + out.f_params2 = p.params2; + out.f_kind_flags = p.kind_flags; + out.gl_Position = _12.projection * float4(world_pos * _12.dpi_scale, 0.0, 1.0); + } + return out; +} + diff --git a/draw/shaders/generated/base_2d.vert.spv b/draw/shaders/generated/base_2d.vert.spv new file mode 100644 index 0000000..d32d2b8 Binary files /dev/null and b/draw/shaders/generated/base_2d.vert.spv differ diff --git a/draw/shaders/source/base_2d.frag b/draw/shaders/source/base_2d.frag new file mode 100644 index 0000000..013dfd1 --- /dev/null +++ b/draw/shaders/source/base_2d.frag @@ -0,0 +1,192 @@ +#version 450 core + +// --- Inputs from vertex shader --- +layout(location = 0) in vec4 f_color; +layout(location = 1) in vec2 f_local_or_uv; +layout(location = 2) in vec4 f_params; +layout(location = 3) in vec4 f_params2; +layout(location = 4) flat in uint f_kind_flags; + +// --- Output --- +layout(location = 0) out vec4 out_color; + +// --- Texture sampler (for tessellated/text path) --- +layout(set = 2, binding = 0) uniform sampler2D tex; + +// --------------------------------------------------------------------------- +// SDF helper functions (Inigo Quilez) +// All operate in physical pixel space — no dpi_scale needed here. +// --------------------------------------------------------------------------- + +const float PI = 3.14159265358979; + +float sdCircle(vec2 p, float r) { + return length(p) - r; +} + +float sdRoundedBox(vec2 p, vec2 b, vec4 r) { + r.xy = (p.x > 0.0) ? r.xy : r.zw; + r.x = (p.y > 0.0) ? r.x : r.y; + vec2 q = abs(p) - b + r.x; + return min(max(q.x, q.y), 0.0) + length(max(q, vec2(0.0))) - r.x; +} + +float sdSegment(vec2 p, vec2 a, vec2 b) { + vec2 pa = p - a, ba = b - a; + float h = clamp(dot(pa, ba) / dot(ba, ba), 0.0, 1.0); + return length(pa - ba * h); +} + +float sdEllipse(vec2 p, vec2 ab) { + p = abs(p); + if (p.x > p.y) { + p = p.yx; + ab = ab.yx; + } + float l = ab.y * ab.y - ab.x * ab.x; + float m = ab.x * p.x / l; + float m2 = m * m; + float n = ab.y * p.y / l; + float n2 = n * n; + float c = (m2 + n2 - 1.0) / 3.0; + float c3 = c * c * c; + float q = c3 + m2 * n2 * 2.0; + float d = c3 + m2 * n2; + float g = m + m * n2; + float co; + if (d < 0.0) { + float h = acos(q / c3) / 3.0; + float s = cos(h); + float t = sin(h) * sqrt(3.0); + float rx = sqrt(-c * (s + t + 2.0) + m2); + float ry = sqrt(-c * (s - t + 2.0) + m2); + co = (ry + sign(l) * rx + abs(g) / (rx * ry) - m) / 2.0; + } else { + float h = 2.0 * m * n * sqrt(d); + float s = sign(q + h) * pow(abs(q + h), 1.0 / 3.0); + float u = sign(q - h) * pow(abs(q - h), 1.0 / 3.0); + float rx = -s - u - c * 4.0 + 2.0 * m2; + float ry = (s - u) * sqrt(3.0); + float rm = sqrt(rx * rx + ry * ry); + co = (ry / sqrt(rm - rx) + 2.0 * g / rm - m) / 2.0; + } + vec2 r = ab * vec2(co, sqrt(1.0 - co * co)); + return length(r - p) * sign(p.y - r.y); +} + +float sdf_alpha(float d, float soft) { + return 1.0 - smoothstep(-soft, soft, d); +} + +float sdf_stroke(float d, float stroke_width) { + return abs(d) - stroke_width * 0.5; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- + +void main() { + uint kind = f_kind_flags & 0xFFu; + uint flags = (f_kind_flags >> 8u) & 0xFFu; + + // ----------------------------------------------------------------------- + // Kind 0: Tessellated path. Texture multiply for text atlas, + // white pixel for solid shapes. + // ----------------------------------------------------------------------- + if (kind == 0u) { + out_color = f_color * texture(tex, f_local_or_uv); + return; + } + + // ----------------------------------------------------------------------- + // SDF path. f_local_or_uv = shape-centered position in physical pixels. + // All dimensional params are already in physical pixels (CPU pre-scaled). + // ----------------------------------------------------------------------- + float d = 1e30; + float soft = 1.0; + + if (kind == 1u) { + // RRect: rounded box + vec2 b = f_params.xy; // half_size (phys px) + vec4 r = vec4(f_params.zw, f_params2.xy); // corner radii: tr, br, tl, bl + soft = max(f_params2.z, 1.0); + float stroke_px = f_params2.w; + + d = sdRoundedBox(f_local_or_uv, b, r); + if ((flags & 1u) != 0u) d = sdf_stroke(d, stroke_px); + } + else if (kind == 2u) { + // Circle + float radius = f_params.x; + soft = max(f_params.y, 1.0); + float stroke_px = f_params.z; + + d = sdCircle(f_local_or_uv, radius); + if ((flags & 1u) != 0u) d = sdf_stroke(d, stroke_px); + } + else if (kind == 3u) { + // Ellipse + vec2 ab = f_params.xy; + soft = max(f_params.z, 1.0); + float stroke_px = f_params.w; + + d = sdEllipse(f_local_or_uv, ab); + if ((flags & 1u) != 0u) d = sdf_stroke(d, stroke_px); + } + else if (kind == 4u) { + // Segment (capsule line) + vec2 a = f_params.xy; // already in local physical pixels + vec2 b = f_params.zw; + float width = f_params2.x; + soft = max(f_params2.y, 1.0); + + d = sdSegment(f_local_or_uv, a, b) - width * 0.5; + } + else if (kind == 5u) { + // Ring / Arc + float inner = f_params.x; + float outer = f_params.y; + float start_rad = f_params.z; + float end_rad = f_params.w; + soft = max(f_params2.x, 1.0); + + float r = length(f_local_or_uv); + float d_ring = max(inner - r, r - outer); + + // Angular clip + float angle = atan(f_local_or_uv.y, f_local_or_uv.x); + if (angle < 0.0) angle += 2.0 * PI; + float ang_start = start_rad; + float ang_end = end_rad; + if (ang_start < 0.0) ang_start += 2.0 * PI; + if (ang_end < 0.0) ang_end += 2.0 * PI; + + float in_arc = (ang_end > ang_start) + ? ((angle >= ang_start && angle <= ang_end) ? 1.0 : 0.0) : ((angle >= ang_start || angle <= ang_end) ? 1.0 : 0.0); + if (abs(ang_end - ang_start) >= 2.0 * PI - 0.001) in_arc = 1.0; + + d = in_arc > 0.5 ? d_ring : 1e30; + } + else if (kind == 6u) { + // Regular N-gon + float radius = f_params.x; + float rotation = f_params.y; + float sides = f_params.z; + soft = max(f_params.w, 1.0); + float stroke_px = f_params2.x; + + vec2 p = f_local_or_uv; + float c = cos(rotation), s = sin(rotation); + p = mat2(c, -s, s, c) * p; + + float an = PI / sides; + float bn = mod(atan(p.y, p.x), 2.0 * an) - an; + d = length(p) * cos(bn) - radius; + + if ((flags & 1u) != 0u) d = sdf_stroke(d, stroke_px); + } + + float alpha = sdf_alpha(d, soft); + out_color = vec4(f_color.rgb, f_color.a * alpha); +} diff --git a/draw/shaders/source/base_2d.vert b/draw/shaders/source/base_2d.vert new file mode 100644 index 0000000..2e09ec6 --- /dev/null +++ b/draw/shaders/source/base_2d.vert @@ -0,0 +1,63 @@ +#version 450 core + +// ---------- Vertex attributes (used in both modes) ---------- +layout(location = 0) in vec2 v_position; +layout(location = 1) in vec2 v_uv; +layout(location = 2) in vec4 v_color; + +// ---------- Outputs to fragment shader ---------- +layout(location = 0) out vec4 f_color; +layout(location = 1) out vec2 f_local_or_uv; +layout(location = 2) out vec4 f_params; +layout(location = 3) out vec4 f_params2; +layout(location = 4) flat out uint f_kind_flags; + +// ---------- Uniforms (single block — avoids spirv-cross reordering on Metal) ---------- +layout(set = 1, binding = 0) uniform Uniforms { + mat4 projection; + float dpi_scale; + uint mode; // 0 = tessellated, 1 = SDF +}; + +// ---------- SDF primitive storage buffer ---------- +struct Primitive { + vec4 bounds; // 0-15: min_x, min_y, max_x, max_y + uint color; // 16-19: packed u8x4 (unpack with unpackUnorm4x8) + uint kind_flags; // 20-23: kind | (flags << 8) + vec2 _pad; // 24-31: padding + vec4 params; // 32-47: shape params part 1 + vec4 params2; // 48-63: shape params part 2 +}; + +layout(std430, set = 0, binding = 0) readonly buffer Primitives { + Primitive primitives[]; +}; + +// ---------- Entry point ---------- +void main() { + if (mode == 0u) { + // ---- Mode 0: Tessellated (legacy) ---- + f_color = v_color; + f_local_or_uv = v_uv; + f_params = vec4(0.0); + f_params2 = vec4(0.0); + f_kind_flags = 0u; + + gl_Position = projection * vec4(v_position * dpi_scale, 0.0, 1.0); + } else { + // ---- Mode 1: SDF instanced quads ---- + Primitive p = primitives[gl_InstanceIndex]; + + vec2 corner = v_position; // unit quad corners: (0,0)-(1,1) + vec2 world_pos = mix(p.bounds.xy, p.bounds.zw, corner); + vec2 center = 0.5 * (p.bounds.xy + p.bounds.zw); + + f_color = unpackUnorm4x8(p.color); + f_local_or_uv = (world_pos - center) * dpi_scale; // shape-centered physical pixels + f_params = p.params; + f_params2 = p.params2; + f_kind_flags = p.kind_flags; + + gl_Position = projection * vec4(world_pos * dpi_scale, 0.0, 1.0); + } +} diff --git a/draw/shapes.odin b/draw/shapes.odin new file mode 100644 index 0000000..96c81a8 --- /dev/null +++ b/draw/shapes.odin @@ -0,0 +1,669 @@ +package draw + +import "core:math" + +SMOOTH_CIRCLE_ERROR_RATE :: 0.1 + +// ----- Adaptive tessellation ---- + +auto_segments :: proc(radius: f32, arc_degrees: f32) -> int { + if radius <= 0 do return 4 + phys_radius := radius * GLOB.dpi_scaling + acos_arg := clamp(2 * math.pow(1 - SMOOTH_CIRCLE_ERROR_RATE / phys_radius, 2) - 1, -1, 1) + th := math.acos(acos_arg) + if th <= 0 do return 4 + full_circle_segs := int(math.ceil(2 * math.PI / th)) + segs := int(f32(full_circle_segs) * arc_degrees / 360.0) + min_segs := max(int(math.ceil(f64(arc_degrees / 90.0))), 4) + return max(segs, min_segs) +} + +// ----- Internal helpers ---- + +@(private = "file") +extrude_line :: proc( + start, end_pos: [2]f32, + thick: f32, + color: Color, + vertices: []Vertex, + offset: int, +) -> int { + direction := end_pos - start + dx := direction[0] + dy := direction[1] + length := math.sqrt(dx * dx + dy * dy) + if length < 0.0001 do return 0 + + scale := thick / (2 * length) + perpendicular := [2]f32{-dy * scale, dx * scale} + + p0 := start + perpendicular + p1 := start - perpendicular + p2 := end_pos - perpendicular + p3 := end_pos + perpendicular + + vertices[offset + 0] = sv(p0, color) + vertices[offset + 1] = sv(p1, color) + vertices[offset + 2] = sv(p2, color) + vertices[offset + 3] = sv(p0, color) + vertices[offset + 4] = sv(p2, color) + vertices[offset + 5] = sv(p3, color) + + return 6 +} + +// Create a vertex for solid-color shape drawing (no texture, UV defaults to zero). +@(private = "file") +sv :: proc(pos: [2]f32, color: Color) -> Vertex { + return Vertex{position = pos, color = color} +} + +@(private = "file") +emit_rect :: proc(x, y, w, h: f32, color: Color, vertices: []Vertex, offset: int) { + vertices[offset + 0] = sv({x, y}, color) + vertices[offset + 1] = sv({x + w, y}, color) + vertices[offset + 2] = sv({x + w, y + h}, color) + vertices[offset + 3] = sv({x, y}, color) + vertices[offset + 4] = sv({x + w, y + h}, color) + vertices[offset + 5] = sv({x, y + h}, color) +} + +// ----- Drawing functions ---- + +pixel :: proc(layer: ^Layer, pos: [2]f32, color: Color) { + vertices: [6]Vertex + emit_rect(pos[0], pos[1], 1, 1, color, vertices[:], 0) + prepare_shape(layer, vertices[:]) +} + +rectangle :: proc( + layer: ^Layer, + rect: Rectangle, + color: Color, + origin: [2]f32 = {0, 0}, + rotation: f32 = 0, + temp_allocator := context.temp_allocator, +) { + vertices := make([]Vertex, 6, temp_allocator) + + if rotation == 0 { + emit_rect(rect.x, rect.y, rect.w, rect.h, color, vertices, 0) + } else { + rad := math.to_radians(rotation) + cos_rotation := math.cos(rad) + sin_rotation := math.sin(rad) + + // Corners relative to origin + top_left := [2]f32{-origin[0], -origin[1]} + top_right := [2]f32{rect.w - origin[0], -origin[1]} + bottom_right := [2]f32{rect.w - origin[0], rect.h - origin[1]} + bottom_left := [2]f32{-origin[0], rect.h - origin[1]} + + // Translation to final position + translate := [2]f32{rect.x + origin[0], rect.y + origin[1]} + + // Rotate and translate each corner + tl := + [2]f32 { + cos_rotation * top_left[0] - sin_rotation * top_left[1], + sin_rotation * top_left[0] + cos_rotation * top_left[1], + } + + translate + tr := + [2]f32 { + cos_rotation * top_right[0] - sin_rotation * top_right[1], + sin_rotation * top_right[0] + cos_rotation * top_right[1], + } + + translate + br := + [2]f32 { + cos_rotation * bottom_right[0] - sin_rotation * bottom_right[1], + sin_rotation * bottom_right[0] + cos_rotation * bottom_right[1], + } + + translate + bl := + [2]f32 { + cos_rotation * bottom_left[0] - sin_rotation * bottom_left[1], + sin_rotation * bottom_left[0] + cos_rotation * bottom_left[1], + } + + translate + + vertices[0] = sv(tl, color) + vertices[1] = sv(tr, color) + vertices[2] = sv(br, color) + vertices[3] = sv(tl, color) + vertices[4] = sv(br, color) + vertices[5] = sv(bl, color) + } + + prepare_shape(layer, vertices) +} + +rectangle_lines :: proc( + layer: ^Layer, + rect: Rectangle, + color: Color, + thick: f32 = 1, + temp_allocator := context.temp_allocator, +) { + vertices := make([]Vertex, 24, temp_allocator) + + // Top edge + emit_rect(rect.x, rect.y, rect.w, thick, color, vertices, 0) + // Bottom edge + emit_rect(rect.x, rect.y + rect.h - thick, rect.w, thick, color, vertices, 6) + // Left edge + emit_rect(rect.x, rect.y + thick, thick, rect.h - thick * 2, color, vertices, 12) + // Right edge + emit_rect(rect.x + rect.w - thick, rect.y + thick, thick, rect.h - thick * 2, color, vertices, 18) + + prepare_shape(layer, vertices) +} + +rectangle_gradient :: proc( + layer: ^Layer, + rect: Rectangle, + top_left, top_right, bottom_left, bottom_right: Color, + temp_allocator := context.temp_allocator, +) { + vertices := make([]Vertex, 6, temp_allocator) + + tl := [2]f32{rect.x, rect.y} + tr := [2]f32{rect.x + rect.w, rect.y} + br := [2]f32{rect.x + rect.w, rect.y + rect.h} + bl := [2]f32{rect.x, rect.y + rect.h} + + vertices[0] = sv(tl, top_left) + vertices[1] = sv(tr, top_right) + vertices[2] = sv(br, bottom_right) + vertices[3] = sv(tl, top_left) + vertices[4] = sv(br, bottom_right) + vertices[5] = sv(bl, bottom_left) + + prepare_shape(layer, vertices) +} + +circle_sector :: proc( + layer: ^Layer, + center: [2]f32, + radius: f32, + start_angle, end_angle: f32, + color: Color, + segments: int = 0, + temp_allocator := context.temp_allocator, +) { + arc_length := abs(end_angle - start_angle) + segs := segments > 0 ? segments : auto_segments(radius, arc_length) + + vertex_count := segs * 3 + vertices := make([]Vertex, vertex_count, temp_allocator) + + start_rad := math.to_radians(start_angle) + end_rad := math.to_radians(end_angle) + step_angle := (end_rad - start_rad) / f32(segs) + + for i in 0 ..< segs { + current_angle := start_rad + step_angle * f32(i) + next_angle := start_rad + step_angle * f32(i + 1) + + edge_current := center + [2]f32{math.cos(current_angle) * radius, math.sin(current_angle) * radius} + edge_next := center + [2]f32{math.cos(next_angle) * radius, math.sin(next_angle) * radius} + + idx := i * 3 + vertices[idx + 0] = sv(center, color) + vertices[idx + 1] = sv(edge_next, color) + vertices[idx + 2] = sv(edge_current, color) + } + + prepare_shape(layer, vertices) +} + +circle_gradient :: proc( + layer: ^Layer, + center: [2]f32, + radius: f32, + inner, outer: Color, + segments: int = 0, + temp_allocator := context.temp_allocator, +) { + segs := segments > 0 ? segments : auto_segments(radius, 360) + + vertex_count := segs * 3 + vertices := make([]Vertex, vertex_count, temp_allocator) + + step_angle := math.TAU / f32(segs) + + for i in 0 ..< segs { + current_angle := step_angle * f32(i) + next_angle := step_angle * f32(i + 1) + + edge_current := center + [2]f32{math.cos(current_angle) * radius, math.sin(current_angle) * radius} + edge_next := center + [2]f32{math.cos(next_angle) * radius, math.sin(next_angle) * radius} + + idx := i * 3 + vertices[idx + 0] = sv(center, inner) + vertices[idx + 1] = sv(edge_next, outer) + vertices[idx + 2] = sv(edge_current, outer) + } + + prepare_shape(layer, vertices) +} + +triangle :: proc(layer: ^Layer, v1, v2, v3: [2]f32, color: Color) { + vertices := [3]Vertex{sv(v1, color), sv(v2, color), sv(v3, color)} + prepare_shape(layer, vertices[:]) +} + +triangle_lines :: proc( + layer: ^Layer, + v1, v2, v3: [2]f32, + color: Color, + thick: f32 = 1, + temp_allocator := context.temp_allocator, +) { + vertices := make([]Vertex, 18, temp_allocator) + write_offset := 0 + write_offset += extrude_line(v1, v2, thick, color, vertices, write_offset) + write_offset += extrude_line(v2, v3, thick, color, vertices, write_offset) + write_offset += extrude_line(v3, v1, thick, color, vertices, write_offset) + if write_offset > 0 { + prepare_shape(layer, vertices[:write_offset]) + } +} + +triangle_fan :: proc( + layer: ^Layer, + points: [][2]f32, + color: Color, + temp_allocator := context.temp_allocator, +) { + if len(points) < 3 do return + + triangle_count := len(points) - 2 + vertex_count := triangle_count * 3 + vertices := make([]Vertex, vertex_count, temp_allocator) + + for i in 1 ..< len(points) - 1 { + idx := (i - 1) * 3 + vertices[idx + 0] = sv(points[0], color) + vertices[idx + 1] = sv(points[i], color) + vertices[idx + 2] = sv(points[i + 1], color) + } + + prepare_shape(layer, vertices) +} + +triangle_strip :: proc( + layer: ^Layer, + points: [][2]f32, + color: Color, + temp_allocator := context.temp_allocator, +) { + if len(points) < 3 do return + + triangle_count := len(points) - 2 + vertex_count := triangle_count * 3 + vertices := make([]Vertex, vertex_count, temp_allocator) + + for i in 0 ..< triangle_count { + idx := i * 3 + if i % 2 == 0 { + vertices[idx + 0] = sv(points[i], color) + vertices[idx + 1] = sv(points[i + 1], color) + vertices[idx + 2] = sv(points[i + 2], color) + } else { + vertices[idx + 0] = sv(points[i + 1], color) + vertices[idx + 1] = sv(points[i], color) + vertices[idx + 2] = sv(points[i + 2], color) + } + } + + prepare_shape(layer, vertices) +} + +// ----- SDF drawing functions ---- + +// Draw a rectangle with per-corner rounding radii via SDF. +rectangle_corners :: proc( + layer: ^Layer, + rect: Rectangle, + radii: [4]f32, + color: Color, + soft_px: f32 = 1.0, +) { + max_radius := min(rect.w, rect.h) * 0.5 + tl := clamp(radii[0], 0, max_radius) + tr := clamp(radii[1], 0, max_radius) + br := clamp(radii[2], 0, max_radius) + bl := clamp(radii[3], 0, max_radius) + + pad := soft_px / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {rect.x - pad, rect.y - pad, rect.x + rect.w + pad, rect.y + rect.h + pad}, + color = color, + kind_flags = pack_kind_flags(.RRect, {}), + } + prim.params.rrect = RRect_Params { + half_size = {rect.w * 0.5 * dpi, rect.h * 0.5 * dpi}, + radii = {tr * dpi, br * dpi, tl * dpi, bl * dpi}, + soft_px = soft_px, + stroke_px = 0, + } + prepare_sdf_primitive(layer, prim) +} + +// Draw a stroked rectangle with per-corner rounding radii via SDF. +rectangle_corners_lines :: proc( + layer: ^Layer, + rect: Rectangle, + radii: [4]f32, + color: Color, + thick: f32 = 1, + soft_px: f32 = 1.0, +) { + max_radius := min(rect.w, rect.h) * 0.5 + tl := clamp(radii[0], 0, max_radius) + tr := clamp(radii[1], 0, max_radius) + br := clamp(radii[2], 0, max_radius) + bl := clamp(radii[3], 0, max_radius) + + pad := (thick * 0.5 + soft_px) / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {rect.x - pad, rect.y - pad, rect.x + rect.w + pad, rect.y + rect.h + pad}, + color = color, + kind_flags = pack_kind_flags(.RRect, {.Stroke}), + } + prim.params.rrect = RRect_Params { + half_size = {rect.w * 0.5 * dpi, rect.h * 0.5 * dpi}, + radii = {tr * dpi, br * dpi, tl * dpi, bl * dpi}, + soft_px = soft_px, + stroke_px = thick * dpi, + } + prepare_sdf_primitive(layer, prim) +} + +// Draw a rectangle with uniform corner rounding via SDF. +rectangle_rounded :: proc( + layer: ^Layer, + rect: Rectangle, + roundness: f32, + color: Color, + soft_px: f32 = 1.0, +) { + cr := min(rect.w, rect.h) * clamp(roundness, 0, 1) * 0.5 + if cr < 1 { + rectangle(layer, rect, color) + return + } + rectangle_corners(layer, rect, {cr, cr, cr, cr}, color, soft_px) +} + +// Draw a stroked rectangle with uniform corner rounding via SDF. +rectangle_rounded_lines :: proc( + layer: ^Layer, + rect: Rectangle, + roundness: f32, + color: Color, + thick: f32 = 1, + soft_px: f32 = 1.0, +) { + cr := min(rect.w, rect.h) * clamp(roundness, 0, 1) * 0.5 + if cr < 1 { + rectangle_lines(layer, rect, color, thick) + return + } + rectangle_corners_lines(layer, rect, {cr, cr, cr, cr}, color, thick, soft_px) +} + +// Draw a filled circle via SDF. +circle :: proc(layer: ^Layer, center: [2]f32, radius: f32, color: Color, soft_px: f32 = 1.0) { + pad := soft_px / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {center.x - radius - pad, center.y - radius - pad, + center.x + radius + pad, center.y + radius + pad}, + color = color, + kind_flags = pack_kind_flags(.Circle, {}), + } + prim.params.circle = Circle_Params{radius = radius * dpi, soft_px = soft_px} + prepare_sdf_primitive(layer, prim) +} + +// Draw a stroked circle via SDF. +circle_lines :: proc( + layer: ^Layer, + center: [2]f32, + radius: f32, + color: Color, + thick: f32 = 1, + soft_px: f32 = 1.0, +) { + pad := (thick * 0.5 + soft_px) / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {center.x - radius - pad, center.y - radius - pad, + center.x + radius + pad, center.y + radius + pad}, + color = color, + kind_flags = pack_kind_flags(.Circle, {.Stroke}), + } + prim.params.circle = Circle_Params{ + radius = radius * dpi, soft_px = soft_px, stroke_px = thick * dpi, + } + prepare_sdf_primitive(layer, prim) +} + +// Draw a filled ellipse via SDF. +ellipse :: proc( + layer: ^Layer, + center: [2]f32, + radius_h, radius_v: f32, + color: Color, + soft_px: f32 = 1.0, +) { + pad := soft_px / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {center.x - radius_h - pad, center.y - radius_v - pad, + center.x + radius_h + pad, center.y + radius_v + pad}, + color = color, + kind_flags = pack_kind_flags(.Ellipse, {}), + } + prim.params.ellipse = Ellipse_Params{radii = {radius_h * dpi, radius_v * dpi}, soft_px = soft_px} + prepare_sdf_primitive(layer, prim) +} + +// Draw a stroked ellipse via SDF. +ellipse_lines :: proc( + layer: ^Layer, + center: [2]f32, + radius_h, radius_v: f32, + color: Color, + thick: f32 = 1, + soft_px: f32 = 1.0, +) { + // Extra 10% padding: iq's sdEllipse has precision degradation near the tips of highly + // eccentric ellipses, so the quad needs additional breathing room beyond the stroke width. + pad := (max(radius_h, radius_v) * 0.1 + thick * 0.5 + soft_px) / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {center.x - radius_h - pad, center.y - radius_v - pad, + center.x + radius_h + pad, center.y + radius_v + pad}, + color = color, + kind_flags = pack_kind_flags(.Ellipse, {.Stroke}), + } + prim.params.ellipse = Ellipse_Params{ + radii = {radius_h * dpi, radius_v * dpi}, soft_px = soft_px, stroke_px = thick * dpi, + } + prepare_sdf_primitive(layer, prim) +} + +// Draw a filled ring arc via SDF. +ring :: proc( + layer: ^Layer, + center: [2]f32, + inner_radius, outer_radius: f32, + start_angle, end_angle: f32, + color: Color, + soft_px: f32 = 1.0, +) { + pad := soft_px / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {center.x - outer_radius - pad, center.y - outer_radius - pad, + center.x + outer_radius + pad, center.y + outer_radius + pad}, + color = color, + kind_flags = pack_kind_flags(.Ring_Arc, {}), + } + prim.params.ring_arc = Ring_Arc_Params { + inner_radius = inner_radius * dpi, + outer_radius = outer_radius * dpi, + start_rad = math.to_radians(start_angle), + end_rad = math.to_radians(end_angle), + soft_px = soft_px, + } + prepare_sdf_primitive(layer, prim) +} + +// Draw stroked ring arc outlines via SDF. +ring_lines :: proc( + layer: ^Layer, + center: [2]f32, + inner_radius, outer_radius: f32, + start_angle, end_angle: f32, + color: Color, + thick: f32 = 1, + soft_px: f32 = 1.0, +) { + // Inner arc outline + ring(layer, center, max(0, inner_radius - thick * 0.5), inner_radius + thick * 0.5, + start_angle, end_angle, color, soft_px) + // Outer arc outline + ring(layer, center, max(0, outer_radius - thick * 0.5), outer_radius + thick * 0.5, + start_angle, end_angle, color, soft_px) + // Start cap + start_rad := math.to_radians(start_angle) + end_rad := math.to_radians(end_angle) + inner_start := center + {math.cos(start_rad) * inner_radius, math.sin(start_rad) * inner_radius} + outer_start := center + {math.cos(start_rad) * outer_radius, math.sin(start_rad) * outer_radius} + line(layer, inner_start, outer_start, color, thick, soft_px) + // End cap + inner_end := center + {math.cos(end_rad) * inner_radius, math.sin(end_rad) * inner_radius} + outer_end := center + {math.cos(end_rad) * outer_radius, math.sin(end_rad) * outer_radius} + line(layer, inner_end, outer_end, color, thick, soft_px) +} + +// Draw a line segment via SDF. +line :: proc( + layer: ^Layer, + start, end_pos: [2]f32, + color: Color, + thick: f32 = 1, + soft_px: f32 = 1.0, +) { + cap := thick * 0.5 + soft_px / GLOB.dpi_scaling + min_x := min(start.x, end_pos.x) - cap + max_x := max(start.x, end_pos.x) + cap + min_y := min(start.y, end_pos.y) - cap + max_y := max(start.y, end_pos.y) + cap + dpi := GLOB.dpi_scaling + + center := [2]f32{(min_x + max_x) * 0.5, (min_y + max_y) * 0.5} + local_a := (start - center) * dpi + local_b := (end_pos - center) * dpi + + prim := Primitive { + bounds = {min_x, min_y, max_x, max_y}, + color = color, + kind_flags = pack_kind_flags(.Segment, {}), + } + prim.params.segment = Segment_Params { + a = local_a, + b = local_b, + width = thick * dpi, + soft_px = soft_px, + } + prepare_sdf_primitive(layer, prim) +} + +// Draw a line strip via decomposed SDF segments. +line_strip :: proc( + layer: ^Layer, + points: [][2]f32, + color: Color, + thick: f32 = 1, + soft_px: f32 = 1.0, +) { + if len(points) < 2 do return + for i in 0 ..< len(points) - 1 { + line(layer, points[i], points[i + 1], color, thick, soft_px) + } +} + +// Draw a filled regular polygon via SDF. +poly :: proc( + layer: ^Layer, + center: [2]f32, + sides: int, + radius: f32, + color: Color, + rotation: f32 = 0, + soft_px: f32 = 1.0, +) { + if sides < 3 do return + pad := soft_px / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {center.x - radius - pad, center.y - radius - pad, + center.x + radius + pad, center.y + radius + pad}, + color = color, + kind_flags = pack_kind_flags(.NGon, {}), + } + prim.params.ngon = NGon_Params { + radius = radius * math.cos(math.PI / f32(sides)) * dpi, + rotation = math.to_radians(rotation), + sides = f32(sides), + soft_px = soft_px, + } + prepare_sdf_primitive(layer, prim) +} + +// Draw a stroked regular polygon via SDF. +poly_lines :: proc( + layer: ^Layer, + center: [2]f32, + sides: int, + radius: f32, + color: Color, + rotation: f32 = 0, + thick: f32 = 1, + soft_px: f32 = 1.0, +) { + if sides < 3 do return + pad := (thick * 0.5 + soft_px) / GLOB.dpi_scaling + dpi := GLOB.dpi_scaling + + prim := Primitive { + bounds = {center.x - radius - pad, center.y - radius - pad, + center.x + radius + pad, center.y + radius + pad}, + color = color, + kind_flags = pack_kind_flags(.NGon, {.Stroke}), + } + prim.params.ngon = NGon_Params { + radius = radius * math.cos(math.PI / f32(sides)) * dpi, + rotation = math.to_radians(rotation), + sides = f32(sides), + soft_px = soft_px, + stroke_px = thick * dpi, + } + prepare_sdf_primitive(layer, prim) +} diff --git a/draw/text.odin b/draw/text.odin new file mode 100644 index 0000000..4c04bda --- /dev/null +++ b/draw/text.odin @@ -0,0 +1,140 @@ +package draw + +import "core:log" +import sdl "vendor:sdl3" +import sdl_ttf "vendor:sdl3/ttf" + +Font_Id :: u16 + +Font_Key :: struct { + id: Font_Id, + size: u16, +} + +Text_Cache :: struct { + engine: ^sdl_ttf.TextEngine, + font_bytes: [dynamic][]u8, + sdl_fonts: map[Font_Key]^sdl_ttf.Font, + cache: map[u32]^sdl_ttf.Text, +} + +// Internal for fetching SDL TTF font pointer for rendering +get_font :: proc(id: Font_Id, size: u16) -> ^sdl_ttf.Font { + assert(int(id) < len(GLOB.text_cache.font_bytes), "Invalid font ID.") + key := Font_Key{id, size} + font := GLOB.text_cache.sdl_fonts[key] + + if font == nil { + log.debug("Font with id:", id, "and size:", size, "not found. Adding..") + + font_bytes := GLOB.text_cache.font_bytes[id] + if font_bytes == nil { + log.panicf("Font must first be registered with register_font before using (id=%d)", id) + } + + font_io := sdl.IOFromConstMem(raw_data(font_bytes[:]), len(font_bytes)) + if font_io == nil { + log.panicf("Failed to create IOStream for font id=%d: %s", id, sdl.GetError()) + } + + sdl_font := sdl_ttf.OpenFontIO(font_io, true, f32(size)) + if sdl_font == nil { + log.panicf("Failed to create SDL font for font id=%d size=%d: %s", id, size, sdl.GetError()) + } + + if !sdl_ttf.SetFontSizeDPI(sdl_font, f32(size), 72 * i32(GLOB.dpi_scaling), 72 * i32(GLOB.dpi_scaling)) { + log.panicf("Failed to set font DPI for font id=%d size=%d: %s", id, size, sdl.GetError()) + } + + GLOB.text_cache.sdl_fonts[key] = sdl_font + return sdl_font + } else { + return font + } +} + +// Returns `false` if there are more than max(u16) fonts +register_font :: proc(bytes: []u8) -> (id: Font_Id, ok: bool) #optional_ok { + if GLOB.text_cache.engine == nil { + log.panicf("Cannot register font: text system not initialized. Call init() first.") + } + if len(GLOB.text_cache.font_bytes) > int(max(Font_Id)) do return 0, false + + log.debug("Registering font...") + append(&GLOB.text_cache.font_bytes, bytes) + return Font_Id(len(GLOB.text_cache.font_bytes) - 1), true +} + +Text :: struct { + ref: ^sdl_ttf.Text, + position: [2]f32, + color: Color, +} + +text :: proc( + id: u32, + txt: cstring, + pos: [2]f32, + font_id: Font_Id, + font_size: u16 = 44, + color: Color = {0, 0, 0, 255}, +) -> Text { + sdl_text := GLOB.text_cache.cache[id] + if sdl_text == nil { + sdl_text = sdl_ttf.CreateText(GLOB.text_cache.engine, get_font(font_id, font_size), txt, 0) + if sdl_text == nil { + log.panicf("Failed to create SDL text: %s", sdl.GetError()) + } + GLOB.text_cache.cache[id] = sdl_text + } else { + //TODO if IDs are always unique and never change the underlying text + // can get rid of this + if !sdl_ttf.SetTextString(sdl_text, txt, 0) { + log.panicf("Failed to update SDL text string: %s", sdl.GetError()) + } + } + + return Text{sdl_text, pos, color} +} + +@(private, require_results) +init_text_cache :: proc( + device: ^sdl.GPUDevice, + allocator := context.allocator, +) -> ( + text_cache: Text_Cache, + ok: bool, +) { + log.debug("Initializing text state") + if !sdl_ttf.Init() { + log.errorf("Failed to initialize SDL_ttf: %s", sdl.GetError()) + return text_cache, false + } + + engine := sdl_ttf.CreateGPUTextEngine(device) + if engine == nil { + log.errorf("Failed to create GPU text engine: %s", sdl.GetError()) + sdl_ttf.Quit() + return text_cache, false + } + sdl_ttf.SetGPUTextEngineWinding(engine, .COUNTER_CLOCKWISE) + + text_cache = Text_Cache { + engine = engine, + cache = make(map[u32]^sdl_ttf.Text, allocator = allocator), + } + + log.debug("Done initializing text cache") + return text_cache, true +} + +destroy_text_cache :: proc() { + for _, font in GLOB.text_cache.sdl_fonts { + sdl_ttf.CloseFont(font) + } + delete(GLOB.text_cache.sdl_fonts) + delete(GLOB.text_cache.font_bytes) + delete(GLOB.text_cache.cache) + sdl_ttf.DestroyGPUTextEngine(GLOB.text_cache.engine) + sdl_ttf.Quit() +} diff --git a/meta/gen_shaders.odin b/meta/gen_shaders.odin new file mode 100644 index 0000000..1755693 --- /dev/null +++ b/meta/gen_shaders.odin @@ -0,0 +1,141 @@ +package meta + +import "core:fmt" +import "core:os" +import "core:strings" + +// Compiles all GLSL shaders in source_dir to both SPIR-V (.spv) and +// Metal Shading Language (.metal), writing results to generated_dir. +// Overwrites any previously generated files with matching names. +// Requires `glslangValidator` and `spirv-cross` on PATH. +gen_shaders :: proc(source_dir, generated_dir: string) -> (success: bool) { + if !verify_shader_tool("glslangValidator") do return false + if !verify_shader_tool("spirv-cross") do return false + + source_entries, read_err := os.read_all_directory_by_path(source_dir, context.temp_allocator) + if read_err != nil { + fmt.eprintfln("Failed to read shader source directory '%s': %v", source_dir, read_err) + return false + } + shader_names := make([dynamic]string, len = 0, cap = 24, allocator = context.temp_allocator) + + for entry in source_entries { + if strings.has_suffix(entry.name, ".vert") || strings.has_suffix(entry.name, ".frag") { + append(&shader_names, entry.name) + } + } + + if len(shader_names) == 0 { + fmt.eprintfln("No shader source files (.vert, .frag) found in '%s'.", source_dir) + return false + } + if os.exists(generated_dir) { + rmdir_err := os.remove_all(generated_dir) + if rmdir_err != nil { + fmt.eprintfln("Failed to remove old output directory '%s': %v", generated_dir, rmdir_err) + return false + } + } + mkdir_err := os.mkdir(generated_dir) + if mkdir_err != nil { + fmt.eprintfln("Failed to create output directory '%s': %v", generated_dir, mkdir_err) + return false + } + + compiled_count := 0 + for shader_name in shader_names { + source_path := fmt.tprintf("%s/%s", source_dir, shader_name) + spv_path := fmt.tprintf("%s/%s.spv", generated_dir, shader_name) + metal_path := fmt.tprintf("%s/%s.metal", generated_dir, shader_name) + + fmt.printfln("[GLSL -> SPIR-V] %s", shader_name) + if !compile_glsl_to_spirv(source_path, spv_path) do continue + + fmt.printfln("[SPIR-V -> MSL] %s", shader_name) + if !compile_spirv_to_msl(spv_path, metal_path) do continue + + compiled_count += 1 + } + + total := len(shader_names) + if compiled_count == total { + fmt.printfln("Successfully compiled all %d shaders.", total) + return true + } + + fmt.eprintfln("%d of %d shaders failed to compile.", total - compiled_count, total) + return false +} + +verify_shader_tool :: proc(tool_name: string) -> bool { + _, _, _, err := os.process_exec( + os.Process_Desc{command = []string{tool_name, "--version"}}, + context.temp_allocator, + ) + + if err != nil { + fmt.eprintfln("Required tool '%s' not found on PATH.", tool_name) + if tool_name == "glslangValidator" { + fmt.eprintln("\tInstall the Vulkan SDK or the glslang package:") + fmt.eprintln("\t macOS: brew install glslang") + fmt.eprintln("\t Arch: sudo pacman -S glslang") + fmt.eprintln("\t Debian: sudo apt install glslang-tools") + } else if tool_name == "spirv-cross" { + fmt.eprintln("\tInstall SPIRV-Cross:") + fmt.eprintln("\t macOS: brew install spirv-cross") + fmt.eprintln("\t Arch: sudo pacman -S spirv-cross") + fmt.eprintln("\t Debian: sudo apt install spirv-cross") + } + return false + } + + return true +} + +compile_glsl_to_spirv :: proc(source_path, output_path: string) -> bool { + state, stdout_bytes, stderr_bytes, err := os.process_exec( + os.Process_Desc{command = []string{"glslangValidator", "-V", source_path, "-o", output_path}}, + context.temp_allocator, + ) + + if err != nil { + fmt.eprintfln("\tFailed to run glslangValidator for '%s': %v", source_path, err) + return false + } + + if !state.success { + fmt.eprintfln("\tglslangValidator failed for '%s' (exit code %d):", source_path, state.exit_code) + print_tool_output(stdout_bytes, stderr_bytes) + return false + } + + return true +} + +compile_spirv_to_msl :: proc(spv_path, output_path: string) -> bool { + state, stdout_bytes, stderr_bytes, err := os.process_exec( + os.Process_Desc{command = []string{"spirv-cross", "--msl", spv_path, "--output", output_path}}, + context.temp_allocator, + ) + + if err != nil { + fmt.eprintfln("\tFailed to run spirv-cross for '%s': %v", spv_path, err) + return false + } + + if !state.success { + fmt.eprintfln("\tspirv-cross failed for '%s' (exit code %d):", spv_path, state.exit_code) + print_tool_output(stdout_bytes, stderr_bytes) + return false + } + + return true +} + +print_tool_output :: proc(stdout_bytes, stderr_bytes: []u8) { + stderr_text := strings.trim_right_space(transmute(string)stderr_bytes) + stdout_text := strings.trim_right_space(transmute(string)stdout_bytes) + + if len(stderr_text) > 0 do fmt.eprintfln("\t%s", stderr_text) + if len(stdout_text) > 0 do fmt.eprintfln("\t%s", stdout_text) +} diff --git a/meta/main.odin b/meta/main.odin new file mode 100644 index 0000000..e154245 --- /dev/null +++ b/meta/main.odin @@ -0,0 +1,51 @@ +package meta + +import "core:fmt" +import "core:os" + +Command :: struct { + name: string, + description: string, + run: proc() -> bool, +} + +COMMANDS :: []Command { + { + name = "gen-shaders", + description = "Compile GLSL shaders to SPIR-V and Metal Shading Language.", + run = proc() -> bool { + return gen_shaders("draw/shaders/source", "draw/shaders/generated") + }, + }, +} + +main :: proc() { + args := os.args[1:] + + if len(args) == 0 { + print_usage() + return + } + + command_name := args[0] + for command in COMMANDS { + if command.name == command_name { + if !command.run() do os.exit(1) + return + } + } + + fmt.eprintfln("Unknown command '%s'.", command_name) + fmt.eprintln() + print_usage() + os.exit(1) +} + +print_usage :: proc() { + fmt.eprintln("Usage: meta ") + fmt.eprintln() + fmt.eprintln("Commands:") + for command in COMMANDS { + fmt.eprintfln(" %-20s %s", command.name, command.description) + } +} diff --git a/vendor/clay/clay.odin b/vendor/clay/clay.odin new file mode 100644 index 0000000..ad1e1ee --- /dev/null +++ b/vendor/clay/clay.odin @@ -0,0 +1,489 @@ +package clay + +import "core:c" + +when ODIN_OS == .Windows { + foreign import Clay "windows/clay.lib" +} else when ODIN_OS == .Linux { + foreign import Clay "linux/clay.a" +} else when ODIN_OS == .Darwin { + when ODIN_ARCH == .arm64 { + foreign import Clay "macos-arm64/clay.a" + } else { + foreign import Clay "macos/clay.a" + } +} else when ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32 { + foreign import Clay "wasm/clay.o" +} + +String :: struct { + isStaticallyAllocated: c.bool, + length: c.int32_t, + chars: [^]c.char, +} + +StringSlice :: struct { + length: c.int32_t, + chars: [^]c.char, + baseChars: [^]c.char, +} + +Vector2 :: [2]c.float + +Dimensions :: struct { + width: c.float, + height: c.float, +} + +Arena :: struct { + nextAllocation: uintptr, + capacity: c.size_t, + memory: [^]c.char, +} + +BoundingBox :: struct { + x: c.float, + y: c.float, + width: c.float, + height: c.float, +} + +Color :: [4]c.float + +CornerRadius :: struct { + topLeft: c.float, + topRight: c.float, + bottomLeft: c.float, + bottomRight: c.float, +} + +BorderData :: struct { + width: u32, + color: Color, +} + +ElementId :: struct { + id: u32, + offset: u32, + baseId: u32, + stringId: String, +} + +when ODIN_OS == .Windows { + EnumBackingType :: u32 +} else { + EnumBackingType :: u8 +} + +RenderCommandType :: enum EnumBackingType { + None, + Rectangle, + Border, + Text, + Image, + ScissorStart, + ScissorEnd, + Custom, +} + +RectangleElementConfig :: struct { + color: Color, +} + +TextWrapMode :: enum EnumBackingType { + Words, + Newlines, + None, +} + +TextAlignment :: enum EnumBackingType { + Left, + Center, + Right, +} + +TextElementConfig :: struct { + userData: rawptr, + textColor: Color, + fontId: u16, + fontSize: u16, + letterSpacing: u16, + lineHeight: u16, + wrapMode: TextWrapMode, + textAlignment: TextAlignment, +} + +AspectRatioElementConfig :: struct { + aspectRatio: f32, +} + +ImageElementConfig :: struct { + imageData: rawptr, +} + +CustomElementConfig :: struct { + customData: rawptr, +} + +BorderWidth :: struct { + left: u16, + right: u16, + top: u16, + bottom: u16, + betweenChildren: u16, +} + +BorderElementConfig :: struct { + color: Color, + width: BorderWidth, +} + +ClipElementConfig :: struct { + horizontal: bool, // clip overflowing elements on the "X" axis + vertical: bool, // clip overflowing elements on the "Y" axis + childOffset: Vector2, // offsets the [X,Y] positions of all child elements, primarily for scrolling containers +} + +FloatingAttachPointType :: enum EnumBackingType { + LeftTop, + LeftCenter, + LeftBottom, + CenterTop, + CenterCenter, + CenterBottom, + RightTop, + RightCenter, + RightBottom, +} + +FloatingAttachPoints :: struct { + element: FloatingAttachPointType, + parent: FloatingAttachPointType, +} + +PointerCaptureMode :: enum EnumBackingType { + Capture, + Passthrough, +} + +FloatingAttachToElement :: enum EnumBackingType { + None, + Parent, + ElementWithId, + Root, +} + +FloatingClipToElement :: enum EnumBackingType { + None, + AttachedParent, +} + +FloatingElementConfig :: struct { + offset: Vector2, + expand: Dimensions, + parentId: u32, + zIndex: i16, + attachment: FloatingAttachPoints, + pointerCaptureMode: PointerCaptureMode, + attachTo: FloatingAttachToElement, + clipTo: FloatingClipToElement, +} + +TextRenderData :: struct { + stringContents: StringSlice, + textColor: Color, + fontId: u16, + fontSize: u16, + letterSpacing: u16, + lineHeight: u16, +} + +RectangleRenderData :: struct { + backgroundColor: Color, + cornerRadius: CornerRadius, +} + +ImageRenderData :: struct { + backgroundColor: Color, + cornerRadius: CornerRadius, + imageData: rawptr, +} + +CustomRenderData :: struct { + backgroundColor: Color, + cornerRadius: CornerRadius, + customData: rawptr, +} + +BorderRenderData :: struct { + color: Color, + cornerRadius: CornerRadius, + width: BorderWidth, +} + +RenderCommandData :: struct #raw_union { + rectangle: RectangleRenderData, + text: TextRenderData, + image: ImageRenderData, + custom: CustomRenderData, + border: BorderRenderData, +} + +RenderCommand :: struct { + boundingBox: BoundingBox, + renderData: RenderCommandData, + userData: rawptr, + id: u32, + zIndex: i16, + commandType: RenderCommandType, +} + +ScrollContainerData :: struct { + // Note: This is a pointer to the real internal scroll position, mutating it may cause a change in final layout. + // Intended for use with external functionality that modifies scroll position, such as scroll bars or auto scrolling. + scrollPosition: ^Vector2, + scrollContainerDimensions: Dimensions, + contentDimensions: Dimensions, + config: ClipElementConfig, + // Indicates whether an actual scroll container matched the provided ID or if the default struct was returned. + found: bool, +} + +ElementData :: struct { + boundingBox: BoundingBox, + found: bool, +} + +PointerDataInteractionState :: enum EnumBackingType { + PressedThisFrame, + Pressed, + ReleasedThisFrame, + Released, +} + +PointerData :: struct { + position: Vector2, + state: PointerDataInteractionState, +} + +SizingType :: enum EnumBackingType { + Fit, + Grow, + Percent, + Fixed, +} + +SizingConstraintsMinMax :: struct { + min: c.float, + max: c.float, +} + +SizingConstraints :: struct #raw_union { + sizeMinMax: SizingConstraintsMinMax, + sizePercent: c.float, +} + +SizingAxis :: struct { + // Note: `min` is used for CLAY_SIZING_PERCENT, slightly different to clay.h due to lack of C anonymous unions + constraints: SizingConstraints, + type: SizingType, +} + +Sizing :: struct { + width: SizingAxis, + height: SizingAxis, +} + +Padding :: struct { + left: u16, + right: u16, + top: u16, + bottom: u16, +} + +LayoutDirection :: enum EnumBackingType { + LeftToRight, + TopToBottom, +} + +LayoutAlignmentX :: enum EnumBackingType { + Left, + Right, + Center, +} + +LayoutAlignmentY :: enum EnumBackingType { + Top, + Bottom, + Center, +} + +ChildAlignment :: struct { + x: LayoutAlignmentX, + y: LayoutAlignmentY, +} + +LayoutConfig :: struct { + sizing: Sizing, + padding: Padding, + childGap: u16, + childAlignment: ChildAlignment, + layoutDirection: LayoutDirection, +} + +ClayArray :: struct($type: typeid) { + capacity: i32, + length: i32, + internalArray: [^]type, +} + +ElementDeclaration :: struct { + id: ElementId, + layout: LayoutConfig, + backgroundColor: Color, + cornerRadius: CornerRadius, + aspectRatio: AspectRatioElementConfig, + image: ImageElementConfig, + floating: FloatingElementConfig, + custom: CustomElementConfig, + clip: ClipElementConfig, + border: BorderElementConfig, + userData: rawptr, +} + +ErrorType :: enum EnumBackingType { + TextMeasurementFunctionNotProvided, + ArenaCapacityExceeded, + ElementsCapacityExceeded, + TextMeasurementCapacityExceeded, + DuplicateId, + FloatingContainerParentNotFound, + PercentageOver1, + InternalError, +} + +ErrorData :: struct { + errorType: ErrorType, + errorText: String, + userData: rawptr, +} + +ErrorHandler :: struct { + handler: proc "c" (errorData: ErrorData), + userData: rawptr, +} + +Context :: struct {} // opaque structure, only use as a pointer + +@(link_prefix = "Clay_", default_calling_convention = "c") +foreign Clay { + _OpenElement :: proc() --- + _CloseElement :: proc() --- + MinMemorySize :: proc() -> u32 --- + CreateArenaWithCapacityAndMemory :: proc(capacity: c.size_t, offset: [^]u8) -> Arena --- + SetPointerState :: proc(position: Vector2, pointerDown: bool) --- + Initialize :: proc(arena: Arena, layoutDimensions: Dimensions, errorHandler: ErrorHandler) -> ^Context --- + GetCurrentContext :: proc() -> ^Context --- + SetCurrentContext :: proc(ctx: ^Context) --- + UpdateScrollContainers :: proc(enableDragScrolling: bool, scrollDelta: Vector2, deltaTime: c.float) --- + SetLayoutDimensions :: proc(dimensions: Dimensions) --- + BeginLayout :: proc() --- + EndLayout :: proc() -> ClayArray(RenderCommand) --- + GetElementId :: proc(id: String) -> ElementId --- + GetElementIdWithIndex :: proc(id: String, index: u32) -> ElementId --- + GetElementData :: proc(id: ElementId) -> ElementData --- + Hovered :: proc() -> bool --- + OnHover :: proc(onHoverFunction: proc "c" (id: ElementId, pointerData: PointerData, userData: rawptr), userData: rawptr) --- + PointerOver :: proc(id: ElementId) -> bool --- + GetScrollOffset :: proc() -> Vector2 --- + GetScrollContainerData :: proc(id: ElementId) -> ScrollContainerData --- + SetMeasureTextFunction :: proc(measureTextFunction: proc "c" (text: StringSlice, config: ^TextElementConfig, userData: rawptr) -> Dimensions, userData: rawptr) --- + SetQueryScrollOffsetFunction :: proc(queryScrollOffsetFunction: proc "c" (elementId: u32, userData: rawptr) -> Vector2, userData: rawptr) --- + RenderCommandArray_Get :: proc(array: ^ClayArray(RenderCommand), index: i32) -> ^RenderCommand --- + SetDebugModeEnabled :: proc(enabled: bool) --- + IsDebugModeEnabled :: proc() -> bool --- + SetCullingEnabled :: proc(enabled: bool) --- + GetMaxElementCount :: proc() -> i32 --- + SetMaxElementCount :: proc(maxElementCount: i32) --- + GetMaxMeasureTextCacheWordCount :: proc() -> i32 --- + SetMaxMeasureTextCacheWordCount :: proc(maxMeasureTextCacheWordCount: i32) --- + ResetMeasureTextCache :: proc() --- +} + +@(link_prefix = "Clay_", default_calling_convention = "c", private) +foreign Clay { + _ConfigureOpenElement :: proc(config: ElementDeclaration) --- + _HashString :: proc(key: String, offset: u32, seed: u32) -> ElementId --- + _OpenTextElement :: proc(text: String, textConfig: ^TextElementConfig) --- + _StoreTextElementConfig :: proc(config: TextElementConfig) -> ^TextElementConfig --- + _GetParentElementId :: proc() -> u32 --- +} + +ConfigureOpenElement :: proc(config: ElementDeclaration) -> bool { + _ConfigureOpenElement(config) + return true +} + +@(deferred_none = _CloseElement) +UI :: proc() -> proc (config: ElementDeclaration) -> bool { + _OpenElement() + return ConfigureOpenElement +} + +Text :: proc($text: string, config: ^TextElementConfig) { + wrapped := MakeString(text) + wrapped.isStaticallyAllocated = true + _OpenTextElement(wrapped, config) +} + +TextDynamic :: proc(text: string, config: ^TextElementConfig) { + _OpenTextElement(MakeString(text), config) +} + +TextConfig :: proc(config: TextElementConfig) -> ^TextElementConfig { + return _StoreTextElementConfig(config) +} + +PaddingAll :: proc(allPadding: u16) -> Padding { + return { left = allPadding, right = allPadding, top = allPadding, bottom = allPadding } +} + +BorderOutside :: proc(width: u16) -> BorderWidth { + return {width, width, width, width, 0} +} + +BorderAll :: proc(width: u16) -> BorderWidth { + return {width, width, width, width, width} +} + +CornerRadiusAll :: proc(radius: f32) -> CornerRadius { + return CornerRadius{radius, radius, radius, radius} +} + +SizingFit :: proc(sizeMinMax: SizingConstraintsMinMax) -> SizingAxis { + return SizingAxis{type = SizingType.Fit, constraints = {sizeMinMax = sizeMinMax}} +} + +SizingGrow :: proc(sizeMinMax: SizingConstraintsMinMax) -> SizingAxis { + return SizingAxis{type = SizingType.Grow, constraints = {sizeMinMax = sizeMinMax}} +} + +SizingFixed :: proc(size: c.float) -> SizingAxis { + return SizingAxis{type = SizingType.Fixed, constraints = {sizeMinMax = {size, size}}} +} + +SizingPercent :: proc(sizePercent: c.float) -> SizingAxis { + return SizingAxis{type = SizingType.Percent, constraints = {sizePercent = sizePercent}} +} + +MakeString :: proc(label: string) -> String { + return String{chars = raw_data(label), length = cast(c.int)len(label)} +} + +ID :: proc(label: string, index: u32 = 0) -> ElementId { + return _HashString(MakeString(label), index, 0) +} + +ID_LOCAL :: proc(label: string, index: u32 = 0) -> ElementId { + return _HashString(MakeString(label), index, _GetParentElementId()) +} \ No newline at end of file diff --git a/vendor/clay/linux/clay.a b/vendor/clay/linux/clay.a new file mode 100644 index 0000000..2568b60 Binary files /dev/null and b/vendor/clay/linux/clay.a differ diff --git a/vendor/clay/macos-arm64/clay.a b/vendor/clay/macos-arm64/clay.a new file mode 100644 index 0000000..337a884 Binary files /dev/null and b/vendor/clay/macos-arm64/clay.a differ diff --git a/vendor/clay/macos/clay.a b/vendor/clay/macos/clay.a new file mode 100644 index 0000000..6739690 Binary files /dev/null and b/vendor/clay/macos/clay.a differ diff --git a/vendor/clay/odinfmt.json b/vendor/clay/odinfmt.json new file mode 100644 index 0000000..ee3d563 --- /dev/null +++ b/vendor/clay/odinfmt.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://raw.githubusercontent.com/DanielGavin/ols/master/misc/odinfmt.schema.json", + "character_width": 180, + "sort_imports": true, + "tabs": false +} \ No newline at end of file diff --git a/vendor/clay/wasm/clay.o b/vendor/clay/wasm/clay.o new file mode 100644 index 0000000..d92193b Binary files /dev/null and b/vendor/clay/wasm/clay.o differ diff --git a/vendor/clay/windows/clay.lib b/vendor/clay/windows/clay.lib new file mode 100644 index 0000000..68d46ee Binary files /dev/null and b/vendor/clay/windows/clay.lib differ