Orgnaization & cleanup
This commit is contained in:
+107
-92
@@ -15,10 +15,10 @@ modes dispatched by a push constant:
|
|||||||
shader premultiplies the texture sample (`t.rgb *= t.a`) and computes `out = color * t`.
|
shader premultiplies the texture sample (`t.rgb *= t.a`) and computes `out = color * t`.
|
||||||
|
|
||||||
- **Mode 1 (SDF):** A static 6-vertex unit-quad buffer is drawn instanced, with per-primitive
|
- **Mode 1 (SDF):** A static 6-vertex unit-quad buffer is drawn instanced, with per-primitive
|
||||||
`Primitive` structs (80 bytes each) uploaded each frame to a GPU storage buffer. The vertex shader
|
`Base_2D_Primitive` structs (96 bytes each) uploaded each frame to a GPU storage buffer. The vertex
|
||||||
reads `primitives[gl_InstanceIndex]`, computes world-space position from unit quad corners +
|
shader reads `primitives[gl_InstanceIndex]`, computes world-space position from unit quad corners +
|
||||||
primitive bounds. The fragment shader dispatches on `Shape_Kind` (encoded in the low byte of
|
primitive bounds. The fragment shader dispatches on `Shape_Kind` (encoded in the low byte of
|
||||||
`Primitive.flags`) to evaluate one of four signed distance functions:
|
`Base_2D_Primitive.flags`) to evaluate one of four signed distance functions:
|
||||||
- **RRect** (kind 1) — `sdRoundedBox` with per-corner radii. Covers rectangles (sharp or rounded),
|
- **RRect** (kind 1) — `sdRoundedBox` with per-corner radii. Covers rectangles (sharp or rounded),
|
||||||
circles (uniform radii = half-size), and line segments / capsules (rotated RRect with uniform
|
circles (uniform radii = half-size), and line segments / capsules (rotated RRect with uniform
|
||||||
radii = half-thickness). Covers filled, outlined, textured, and gradient-filled variants.
|
radii = half-thickness). Covers filled, outlined, textured, and gradient-filled variants.
|
||||||
@@ -28,21 +28,22 @@ modes dispatched by a push constant:
|
|||||||
normals. Covers full rings, partial arcs, and pie slices (`inner_radius = 0`).
|
normals. Covers full rings, partial arcs, and pie slices (`inner_radius = 0`).
|
||||||
|
|
||||||
All SDF shapes support fill, outline, solid color, 2-color linear gradients, 2-color radial
|
All SDF shapes support fill, outline, solid color, 2-color linear gradients, 2-color radial
|
||||||
gradients, and texture fills via `Shape_Flags` (see `pipeline_2d_base.odin`). Gradient and outline
|
gradients, and texture fills via `Shape_Flags` (see `pipeline_2d_base.odin`). The texture UV rect
|
||||||
parameters are packed into the same 16 bytes as the texture UV rect via a `Uv_Or_Effects` raw union
|
(`uv_rect: [4]f32`) and the gradient/outline parameters (`effects: Gradient_Outline`) live in their
|
||||||
— zero size increase to the 80-byte `Primitive` struct. Gradient/outline and texture are mutually
|
own 16-byte slots in `Base_2D_Primitive`, so a primitive can carry texture and outline simultaneously.
|
||||||
exclusive.
|
Gradient and texture remain mutually exclusive at the fill-source level (a Brush variant chooses one
|
||||||
|
or the other) since they share the worst-case fragment-shader register path.
|
||||||
|
|
||||||
All SDF shapes produce mathematically exact curves with analytical anti-aliasing via `smoothstep` —
|
All SDF shapes produce mathematically exact curves with analytical anti-aliasing via `smoothstep` —
|
||||||
no tessellation, no piecewise-linear approximation. A rounded rectangle is 1 primitive (80 bytes)
|
no tessellation, no piecewise-linear approximation. A rounded rectangle is 1 primitive (96 bytes)
|
||||||
instead of ~250 vertices (~5000 bytes).
|
instead of ~250 vertices (~5000 bytes).
|
||||||
|
|
||||||
The main pipeline's register budget is **≤24 registers** (see "Main/effects split: register pressure"
|
The main pipeline's register budget is **≤24 registers** (see "Main/effects split: register pressure"
|
||||||
in the pipeline plan below for the full cliff/margin analysis and SBC architecture context). The
|
in the pipeline plan below for the full cliff/margin analysis and SBC architecture context).
|
||||||
fragment shader's estimated peak footprint is ~22–26 fp32 VGPRs (~16–22 fp16 VGPRs on architectures
|
The fragment shader's estimated peak footprint is ~22–26 fp32 VGPRs (~16–22 fp16 VGPRs on architectures
|
||||||
with native mediump) via manual live-range analysis. The dominant peak is the Ring_Arc kind path
|
with native mediump) via manual live-range analysis. The dominant peak is the Ring_Arc kind path
|
||||||
(wedge normals + inner/outer radii + dot-product temporaries live simultaneously with carried state
|
(wedge normals + inner/outer radii + dot-product temporaries live simultaneously with carried state
|
||||||
like `f_color`, `f_uv_or_effects`, and `half_size`). RRect is 1–2 regs lower (`corner_radii` vec4
|
like `f_color`, `f_uv_rect`/`f_effects`, and `half_size`). RRect is 1–2 regs lower (`corner_radii` vec4
|
||||||
replaces the separate inner/outer + normal pairs). NGon and Ellipse are lighter still. Real compilers
|
replaces the separate inner/outer + normal pairs). NGon and Ellipse are lighter still. Real compilers
|
||||||
apply live-range coalescing, mediump-to-fp16 promotion, and rematerialization that typically shave
|
apply live-range coalescing, mediump-to-fp16 promotion, and rematerialization that typically shave
|
||||||
2–4 regs from hand-counted estimates — the conservative 26-reg upper bound is expected to compile
|
2–4 regs from hand-counted estimates — the conservative 26-reg upper bound is expected to compile
|
||||||
@@ -439,12 +440,13 @@ vertex shader branches on this uniform to select the tessellated or SDF code pat
|
|||||||
- **Tessellated mode** (`mode = 0`): direct vertex buffer with explicit geometry. Used for text
|
- **Tessellated mode** (`mode = 0`): direct vertex buffer with explicit geometry. Used for text
|
||||||
(SDL_ttf atlas sampling), triangles, triangle fans/strips, single-pixel points, and any
|
(SDL_ttf atlas sampling), triangles, triangle fans/strips, single-pixel points, and any
|
||||||
user-provided raw vertex geometry.
|
user-provided raw vertex geometry.
|
||||||
- **SDF mode** (`mode = 1`): shared unit-quad vertex buffer + GPU storage buffer of `Primitive`
|
- **SDF mode** (`mode = 1`): shared unit-quad vertex buffer + GPU storage buffer of
|
||||||
structs, drawn instanced. Used for all shapes with closed-form signed distance functions.
|
`Base_2D_Primitive` structs, drawn instanced. Used for all shapes with closed-form signed distance
|
||||||
|
functions.
|
||||||
|
|
||||||
Both modes use the same fragment shader. The fragment shader checks `Shape_Kind` (low byte of
|
Both modes use the same fragment shader. The fragment shader checks `Shape_Kind` (low byte of
|
||||||
`Primitive.flags`): kind 0 (`Solid`) is the tessellated path, which premultiplies the texture sample
|
`Base_2D_Primitive.flags`): kind 0 (`Solid`) is the tessellated path, which premultiplies the texture
|
||||||
and computes `out = color * t`; kinds 1–4 dispatch to one of four SDF functions (RRect, NGon,
|
sample and computes `out = color * t`; kinds 1–4 dispatch to one of four SDF functions (RRect, NGon,
|
||||||
Ellipse, Ring_Arc) and apply gradient/texture/outline/solid color based on `Shape_Flags` bits.
|
Ellipse, Ring_Arc) and apply gradient/texture/outline/solid color based on `Shape_Flags` bits.
|
||||||
|
|
||||||
#### Why SDF for shapes
|
#### Why SDF for shapes
|
||||||
@@ -452,8 +454,8 @@ Ellipse, Ring_Arc) and apply gradient/texture/outline/solid color based on `Shap
|
|||||||
CPU-side adaptive tessellation for curved shapes (the current approach) has three problems:
|
CPU-side adaptive tessellation for curved shapes (the current approach) has three problems:
|
||||||
|
|
||||||
1. **Vertex bandwidth.** A rounded rectangle with four corner arcs produces ~250 vertices × 20 bytes
|
1. **Vertex bandwidth.** A rounded rectangle with four corner arcs produces ~250 vertices × 20 bytes
|
||||||
= 5 KB. An SDF rounded rectangle is one `Primitive` struct (~56 bytes) plus 4 shared unit-quad
|
= 5 KB. An SDF rounded rectangle is one `Base_2D_Primitive` struct (96 bytes) plus 4 shared
|
||||||
vertices. That is roughly a 90× reduction per shape.
|
unit-quad vertices. That is roughly a 50× reduction per shape.
|
||||||
|
|
||||||
2. **Quality.** Tessellated curves are piecewise-linear approximations. At high DPI or under
|
2. **Quality.** Tessellated curves are piecewise-linear approximations. At high DPI or under
|
||||||
animation/zoom, faceting is visible at any practical segment count. SDF evaluation produces
|
animation/zoom, faceting is visible at any practical segment count. SDF evaluation produces
|
||||||
@@ -484,14 +486,14 @@ SDF primitives are submitted via a GPU storage buffer indexed by `gl_InstanceInd
|
|||||||
shader, rather than encoding per-primitive data redundantly in vertex attributes. This follows the
|
shader, rather than encoding per-primitive data redundantly in vertex attributes. This follows the
|
||||||
pattern used by both Zed GPUI and vger-rs.
|
pattern used by both Zed GPUI and vger-rs.
|
||||||
|
|
||||||
Each SDF shape is described by a single `Primitive` struct (80 bytes) in the storage buffer. The
|
Each SDF shape is described by a single `Base_2D_Primitive` struct (96 bytes) in the storage
|
||||||
vertex shader reads `primitives[gl_InstanceIndex]`, computes the quad corner position from the unit
|
buffer. The vertex shader reads `primitives[gl_InstanceIndex]`, computes the quad corner position
|
||||||
vertex and the primitive's bounds, and passes shape parameters to the fragment shader via `flat`
|
from the unit vertex and the primitive's bounds, and passes shape parameters to the fragment shader
|
||||||
interpolated varyings.
|
via `flat` interpolated varyings.
|
||||||
|
|
||||||
Compared to encoding per-primitive data in vertex attributes (the "fat vertex" approach), storage-
|
Compared to encoding per-primitive data in vertex attributes (the "fat vertex" approach), storage-
|
||||||
buffer instancing eliminates the 4–6× data duplication across quad corners. A rounded rectangle costs
|
buffer instancing eliminates the 4–6× data duplication across quad corners. A rounded rectangle costs
|
||||||
80 bytes instead of 4 vertices × 40+ bytes = 160+ bytes.
|
96 bytes instead of 4 vertices × 60+ bytes = 240+ bytes.
|
||||||
|
|
||||||
The tessellated path retains the existing direct vertex buffer layout (20 bytes/vertex, no storage
|
The tessellated path retains the existing direct vertex buffer layout (20 bytes/vertex, no storage
|
||||||
buffer access). The vertex shader branch on `mode` (push constant) is warp-uniform — every invocation
|
buffer access). The vertex shader branch on `mode` (push constant) is warp-uniform — every invocation
|
||||||
@@ -499,15 +501,18 @@ in a draw call has the same mode — so it is effectively free on all modern GPU
|
|||||||
|
|
||||||
#### Shape kinds and SDF dispatch
|
#### Shape kinds and SDF dispatch
|
||||||
|
|
||||||
The fragment shader dispatches on `Shape_Kind` (low byte of `Primitive.flags`) to evaluate one of
|
The fragment shader dispatches on `Shape_Kind` (low byte of `Base_2D_Primitive.flags`) to evaluate
|
||||||
four signed distance functions. The `Shape_Kind` enum and per-kind `*_Params` structs are defined in
|
one of four signed distance functions. The `Shape_Kind` enum and per-kind `*_Params` structs are
|
||||||
`pipeline_2d_base.odin`. CPU-side drawing procs in `shapes.odin` build the appropriate `Primitive`
|
defined in `pipeline_2d_base.odin`. CPU-side drawing procs in `shapes.odin` build the appropriate
|
||||||
and set the kind automatically:
|
`Base_2D_Primitive` and set the kind automatically:
|
||||||
|
|
||||||
|
Each user-facing shape proc accepts a `Brush` union (color, linear gradient, radial gradient,
|
||||||
|
or textured fill) as its fill source, plus optional outline parameters. The procs map to SDF
|
||||||
|
kinds as follows:
|
||||||
|
|
||||||
| User-facing proc | Shape_Kind | SDF function | Notes |
|
| User-facing proc | Shape_Kind | SDF function | Notes |
|
||||||
| -------------------- | ---------- | ------------------ | ---------------------------------------------------------- |
|
| -------------------- | ---------- | ------------------ | ---------------------------------------------------------- |
|
||||||
| `rectangle` | `RRect` | `sdRoundedBox` | Per-corner radii from `radii` param |
|
| `rectangle` | `RRect` | `sdRoundedBox` | Per-corner radii from `radii` param |
|
||||||
| `rectangle_texture` | `RRect` | `sdRoundedBox` | Textured fill; `.Textured` flag set |
|
|
||||||
| `circle` | `RRect` | `sdRoundedBox` | Uniform radii = half-size (circle is a degenerate RRect) |
|
| `circle` | `RRect` | `sdRoundedBox` | Uniform radii = half-size (circle is a degenerate RRect) |
|
||||||
| `line`, `line_strip` | `RRect` | `sdRoundedBox` | Rotated capsule — stadium shape (radii = half-thickness) |
|
| `line`, `line_strip` | `RRect` | `sdRoundedBox` | Rotated capsule — stadium shape (radii = half-thickness) |
|
||||||
| `ellipse` | `Ellipse` | `sdEllipseApprox` | Approximate ellipse SDF (fast, suitable for UI) |
|
| `ellipse` | `Ellipse` | `sdEllipseApprox` | Approximate ellipse SDF (fast, suitable for UI) |
|
||||||
@@ -599,20 +604,21 @@ to is a hard GPU constraint; the only way to satisfy it is to end the current re
|
|||||||
a new one. That render-pass boundary is what a “bracket” is.
|
a new one. That render-pass boundary is what a “bracket” is.
|
||||||
|
|
||||||
**Multi-pass implementation.** Backdrop effects are implemented as separable multi-pass sequences
|
**Multi-pass implementation.** Backdrop effects are implemented as separable multi-pass sequences
|
||||||
(downsample → horizontal blur → vertical-blur+composite), following the standard approach used by
|
(downsample → horizontal blur → vertical blur → composite), following the standard approach used
|
||||||
iOS `UIVisualEffectView`, Android `RenderEffect`, and Flutter's `BackdropFilter`. Each individual
|
by iOS `UIVisualEffectView`, Android `RenderEffect`, and Flutter's `BackdropFilter`. Each individual
|
||||||
sub-pass is budgeted at **≤24 registers** (same as the main pipeline — full Valhall occupancy). The
|
sub-pass is budgeted at **≤24 registers** (same as the main pipeline — full Valhall occupancy). The
|
||||||
multi-pass approach avoids the monolithic 70+ register shader that a single-pass Gaussian blur would
|
multi-pass approach avoids the monolithic 70+ register shader that a single-pass Gaussian blur would
|
||||||
require, keeping each sub-pass well under the 32-register cliff.
|
require, keeping each sub-pass well under the 32-register cliff.
|
||||||
|
|
||||||
**Approach B: render-target choice.** When any layer in the frame contains a backdrop draw, the
|
**Render-target choice.** When any layer in the frame contains a backdrop draw, the entire
|
||||||
entire frame renders into `source_texture` (a full-resolution single-sample texture owned by the
|
frame renders into `source_texture` (a full-resolution single-sample texture owned by the
|
||||||
backdrop pipeline) instead of directly into the swapchain. At the end of the frame, `source_texture`
|
backdrop pipeline) instead of directly into the swapchain. At the end of the frame,
|
||||||
is copied to the swapchain via a single `CopyGPUTextureToTexture` call. This means the bracket has
|
`source_texture` is copied to the swapchain via a single `CopyGPUTextureToTexture` call.
|
||||||
no mid-frame texture copy: by the time the bracket runs, `source_texture` already contains the pre-
|
This means the bracket has no mid-frame texture copy: by the time the bracket runs,
|
||||||
bracket frame contents and is the natural sampler input. When no layer in the frame has a backdrop
|
`source_texture` already contains the pre-bracket frame contents and is the natural sampler
|
||||||
draw, the existing fast path runs: the frame renders directly to the swapchain and the backdrop
|
input. When no layer in the frame has a backdrop draw, the existing fast path runs: the frame
|
||||||
pipeline's working textures are never touched. Zero cost for backdrop-free frames.
|
renders directly to the swapchain and the backdrop pipeline's working textures are never
|
||||||
|
touched. Zero cost for backdrop-free frames.
|
||||||
|
|
||||||
**Why not split the backdrop sub-passes into separate pipelines?** Each sub-pass is budgeted at ≤24
|
**Why not split the backdrop sub-passes into separate pipelines?** Each sub-pass is budgeted at ≤24
|
||||||
registers, well under Valhall's 32-register cliff, so there is no occupancy motivation for splitting.
|
registers, well under Valhall's 32-register cliff, so there is no occupancy motivation for splitting.
|
||||||
@@ -638,13 +644,20 @@ submission order. Concretely, a layer with one or more backdrops splits into thr
|
|||||||
range. If the layer has no backdrops, none of this kicks in and the layer renders in a single render
|
range. If the layer has no backdrops, none of this kicks in and the layer renders in a single render
|
||||||
pass via the existing fast path.
|
pass via the existing fast path.
|
||||||
|
|
||||||
The downsample runs once per layer, not once per sigma: it just copies `source_texture` to a ¼-
|
Per-sigma-group execution. The bracket walks each layer's sub-batches and groups contiguous
|
||||||
resolution working texture and doesn't depend on the kernel. Each unique sigma in the layer triggers
|
`.Backdrop` sub-batches that share a sigma; each group picks its own downsample factor (1, 2, or 4)
|
||||||
one H-blur (reads `downsample_texture`, writes `h_blur_texture`) and one V-composite (reads
|
based on `compute_backdrop_downsample_factor`. For each group it runs four sub-passes: a downsample
|
||||||
`h_blur_texture`, writes `source_texture` per-primitive with the SDF mask). Sub-batch coalescing in
|
from `source_texture` to `downsample_texture`; an H-blur from `downsample_texture` to
|
||||||
`append_or_extend_sub_batch` merges contiguous same-sigma backdrops into a single instanced V-
|
`h_blur_texture`; a V-blur from `h_blur_texture` back into `downsample_texture` (ping-pong reuse);
|
||||||
composite draw call; non-contiguous same-sigma backdrops still share the H-blur output but issue
|
and finally a composite that reads the fully-blurred `downsample_texture`, applies the SDF mask
|
||||||
separate V-composite draws.
|
and tint, and writes the result to `source_texture`. Sub-batch coalescing in
|
||||||
|
`append_or_extend_sub_batch` merges contiguous same-sigma backdrops into a single instanced
|
||||||
|
composite draw; non-contiguous same-sigma backdrops still share the blur output but issue separate
|
||||||
|
composite draws.
|
||||||
|
|
||||||
|
The working textures are sized at the full swapchain resolution; larger downsample factors only
|
||||||
|
fill a sub-rect via viewport-limited rendering (see the comment block at the top of `backdrop.odin`
|
||||||
|
for the factor-selection table and rationale).
|
||||||
|
|
||||||
#### Submission-order trade-off
|
#### Submission-order trade-off
|
||||||
|
|
||||||
@@ -654,12 +667,12 @@ layer. A non-backdrop sub-batch submitted between two backdrops still renders in
|
|||||||
bracket), not at its submission position. Worked example:
|
bracket), not at its submission position. Worked example:
|
||||||
|
|
||||||
```
|
```
|
||||||
draw.rectangle(layer, bg, GRAY) // 0 Tessellated → Pass A
|
draw.rectangle(layer, bg, GRAY) // 0 Tessellated → Pass A
|
||||||
draw.rectangle(layer, card_blue, BLUE) // 1 SDF → Pass A
|
draw.rectangle(layer, card_blue, BLUE) // 1 SDF → Pass A
|
||||||
draw.rectangle_backdrop(layer, panelA, 12) // 2 Backdrop → Bracket (sees: bg + blue card)
|
draw.gaussian_blur(layer, panelA, sigma=12) // 2 Backdrop → Bracket (sees: bg + blue card)
|
||||||
draw.rectangle(layer, card_red, RED) // 3 SDF → Pass B (drawn ON TOP of panelA)
|
draw.rectangle(layer, card_red, RED) // 3 SDF → Pass B (drawn ON TOP of panelA)
|
||||||
draw.rectangle_backdrop(layer, panelB, 12) // 4 Backdrop → Bracket (sees: bg + blue card; same as panelA)
|
draw.gaussian_blur(layer, panelB, sigma=12) // 4 Backdrop → Bracket (sees: bg + blue card; same as panelA)
|
||||||
draw.text(layer, "label", ...) // 5 Text → Pass B (drawn ON TOP of both panels)
|
draw.text(layer, "label", ...) // 5 Text → Pass B (drawn ON TOP of both panels)
|
||||||
```
|
```
|
||||||
|
|
||||||
In this layer, panelB does *not* see card_red — even though card_red was submitted before panelB —
|
In this layer, panelB does *not* see card_red — even though card_red was submitted before panelB —
|
||||||
@@ -674,11 +687,11 @@ card_red:
|
|||||||
base := draw.begin(...)
|
base := draw.begin(...)
|
||||||
draw.rectangle(base, bg, GRAY)
|
draw.rectangle(base, bg, GRAY)
|
||||||
draw.rectangle(base, card_blue, BLUE)
|
draw.rectangle(base, card_blue, BLUE)
|
||||||
draw.rectangle_backdrop(base, panelA, 12) // panelA in base layer's bracket
|
draw.gaussian_blur(base, panelA, sigma=12) // panelA in base layer's bracket
|
||||||
|
|
||||||
top := draw.new_layer(base, ...)
|
top := draw.new_layer(base, ...)
|
||||||
draw.rectangle(top, card_red, RED)
|
draw.rectangle(top, card_red, RED)
|
||||||
draw.rectangle_backdrop(top, panelB, 12) // top layer's bracket; sees base + card_red
|
draw.gaussian_blur(top, panelB, sigma=12) // top layer's bracket; sees base + card_red
|
||||||
draw.text(top, "label", ...)
|
draw.text(top, "label", ...)
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -708,29 +721,30 @@ draws, `position` carries actual world-space geometry. For SDF draws, `position`
|
|||||||
corners (0,0 to 1,1) and the vertex shader computes world-space position from the storage-buffer
|
corners (0,0 to 1,1) and the vertex shader computes world-space position from the storage-buffer
|
||||||
primitive's bounds.
|
primitive's bounds.
|
||||||
|
|
||||||
The `Primitive` struct for SDF shapes lives in the storage buffer, not in vertex attributes:
|
The `Base_2D_Primitive` struct for SDF shapes lives in the storage buffer, not in vertex attributes:
|
||||||
|
|
||||||
```
|
```
|
||||||
Primitive :: struct {
|
Base_2D_Primitive :: struct {
|
||||||
bounds: [4]f32, // 0: min_x, min_y, max_x, max_y
|
bounds: [4]f32, // 0: min_x, min_y, max_x, max_y
|
||||||
color: Color, // 16: u8x4, unpacked in shader via unpackUnorm4x8
|
color: Color, // 16: u8x4, unpacked in shader via unpackUnorm4x8
|
||||||
flags: u32, // 20: low byte = Shape_Kind, bits 8+ = Shape_Flags
|
flags: u32, // 20: low byte = Shape_Kind, bits 8+ = Shape_Flags
|
||||||
rotation_sc: u32, // 24: packed f16 pair (sin, cos). Requires .Rotated flag.
|
rotation_sc: u32, // 24: packed f16 pair (sin, cos). Requires .Rotated flag.
|
||||||
_pad: f32, // 28: reserved for future use
|
_pad: f32, // 28: reserved for future use
|
||||||
params: Shape_Params, // 32: per-kind params union (half_feather, radii, etc.) (32 bytes)
|
params: Shape_Params, // 32: per-kind params union (half_feather, radii, etc.) (32 bytes)
|
||||||
uv: Uv_Or_Effects, // 64: texture UV rect or gradient/outline parameters (16 bytes)
|
uv_rect: [4]f32, // 64: texture UV coordinates. Read when .Textured.
|
||||||
|
effects: Gradient_Outline, // 80: gradient and/or outline parameters (16 bytes).
|
||||||
}
|
}
|
||||||
// Total: 80 bytes (std430 aligned)
|
// Total: 96 bytes (std430 aligned)
|
||||||
```
|
```
|
||||||
|
|
||||||
`Shape_Params` is a `#raw_union` over `RRect_Params`, `NGon_Params`, `Ellipse_Params`, and
|
`Shape_Params` is a `#raw_union` over `RRect_Params`, `NGon_Params`, `Ellipse_Params`, and
|
||||||
`Ring_Arc_Params` (plus a `raw: [8]f32` view), defined in `pipeline_2d_base.odin`. Each SDF kind
|
`Ring_Arc_Params` (plus a `raw: [8]f32` view), defined in `pipeline_2d_base.odin`. Each SDF kind
|
||||||
writes its own params variant; the fragment shader reads the appropriate fields based on `Shape_Kind`.
|
writes its own params variant; the fragment shader reads the appropriate fields based on `Shape_Kind`.
|
||||||
`Uv_Or_Effects` is a `#raw_union` that aliases `[4]f32` (texture UV rect: u_min, v_min, u_max,
|
`Gradient_Outline` is a 16-byte struct containing `gradient_color: Color`, `outline_color: Color`,
|
||||||
v_max) with a `Gradient_Outline` struct containing `gradient_color: Color`, `outline_color: Color`,
|
|
||||||
`gradient_dir_sc: u32` (packed f16 cos/sin pair), and `outline_packed: u32` (packed f16 outline
|
`gradient_dir_sc: u32` (packed f16 cos/sin pair), and `outline_packed: u32` (packed f16 outline
|
||||||
width). The `flags` field encodes the `Shape_Kind` in the low byte and `Shape_Flags` in bits 8+
|
width). It is independent of `uv_rect`, so a primitive can carry texture and outline parameters at
|
||||||
via `pack_kind_flags`.
|
the same time. The `flags` field encodes the `Shape_Kind` in the low byte and `Shape_Flags` in bits
|
||||||
|
8+ via `pack_kind_flags`.
|
||||||
|
|
||||||
### Draw submission order
|
### Draw submission order
|
||||||
|
|
||||||
@@ -754,7 +768,7 @@ pair into bitmap atlases and emits indexed triangle data via `GetGPUTextDrawData
|
|||||||
**unchanged** by the SDF migration — text continues to flow through the main pipeline's tessellated
|
**unchanged** by the SDF migration — text continues to flow through the main pipeline's tessellated
|
||||||
mode with `mode = 0`, sampling the SDL_ttf atlas texture.
|
mode with `mode = 0`, sampling the SDL_ttf atlas texture.
|
||||||
|
|
||||||
A future phase may evaluate MSDF (multi-channel signed distance field) text rendering, which would
|
MSDF (multi-channel signed distance field) text rendering may be evaluated later, which would
|
||||||
allow resolution-independent glyph rendering from a single small atlas per font. This would involve:
|
allow resolution-independent glyph rendering from a single small atlas per font. This would involve:
|
||||||
|
|
||||||
- Offline atlas generation via Chlumský's msdf-atlas-gen tool.
|
- Offline atlas generation via Chlumský's msdf-atlas-gen tool.
|
||||||
@@ -763,8 +777,7 @@ allow resolution-independent glyph rendering from a single small atlas per font.
|
|||||||
already exists for the four current SDF kinds).
|
already exists for the four current SDF kinds).
|
||||||
- Potential removal of the SDL_ttf dependency.
|
- Potential removal of the SDL_ttf dependency.
|
||||||
|
|
||||||
This is explicitly deferred. The SDF shape migration is independent of and does not block text
|
This is explicitly deferred.
|
||||||
changes.
|
|
||||||
|
|
||||||
**References:**
|
**References:**
|
||||||
|
|
||||||
@@ -778,8 +791,8 @@ changes.
|
|||||||
### Textures
|
### Textures
|
||||||
|
|
||||||
Textures plug into the existing main pipeline — no additional GPU pipeline, no shader rewrite. The
|
Textures plug into the existing main pipeline — no additional GPU pipeline, no shader rewrite. The
|
||||||
work is a resource layer (registration, upload, sampling, lifecycle) plus two textured-draw procs
|
work is a resource layer (registration, upload, sampling, lifecycle) plus a `Texture_Fill` Brush
|
||||||
that route into the existing tessellated and SDF paths respectively.
|
variant that routes the existing shape procs through the SDF path with the `.Textured` flag set.
|
||||||
|
|
||||||
#### Why draw owns registered textures
|
#### Why draw owns registered textures
|
||||||
|
|
||||||
@@ -829,22 +842,25 @@ with the same texture but different samplers produce separate draw calls, which
|
|||||||
|
|
||||||
#### Textured draw procs
|
#### Textured draw procs
|
||||||
|
|
||||||
Textured rectangles route through the existing SDF path via `rectangle_texture`, which mirrors
|
Textures share the same shape procs as colors and gradients. Each shape proc takes a `Brush`
|
||||||
`rectangle` exactly — same parameters for radii, origin, rotation, feather — with the `color`
|
union as its fill source; passing a `Texture_Fill` value (carrying `Texture_Id`, `tint`,
|
||||||
parameter replaced by a `Texture_Id`, an optional `tint`, a `uv_rect`, and a `Sampler_Preset`.
|
`uv_rect`, and `Sampler_Preset`) routes the draw through the SDF path with the `.Textured`
|
||||||
|
flag set. There is no dedicated `rectangle_texture` / `circle_texture` proc — the same
|
||||||
|
`rectangle`, `circle`, `ellipse`, `polygon`, `ring`, `line`, and `line_strip` procs handle
|
||||||
|
all fill sources.
|
||||||
|
|
||||||
An earlier iteration of this design considered a separate tessellated proc for "simple" fullscreen
|
A separate tessellated proc for "simple" fullscreen quads was considered on the theory that
|
||||||
quads, on the theory that the tessellated path's lower register count would improve occupancy at
|
the tessellated path's lower register count would improve occupancy at large fragment counts.
|
||||||
large fragment counts. Both paths are well within the ≤24-register main pipeline budget — both run at
|
Both paths are well within the ≤24-register main pipeline budget — both run at full
|
||||||
full occupancy on every target architecture (Valhall and above). The remaining ALU difference (~15
|
occupancy on every target architecture (Valhall and above). The remaining ALU difference
|
||||||
extra instructions for the SDF evaluation) amounts to ~20μs at 4K — below noise. Meanwhile,
|
(~15 extra instructions for the SDF evaluation) amounts to ~20μs at 4K — below noise.
|
||||||
splitting into a separate pipeline would add ~1–5μs per pipeline bind on the CPU side per scissor,
|
Meanwhile, splitting into a separate pipeline would add ~1–5μs per pipeline bind on the CPU
|
||||||
matching or exceeding the GPU-side savings. Within the main pipeline, unified remains strictly better.
|
side per scissor, matching or exceeding the GPU-side savings. Within the main pipeline,
|
||||||
|
unified remains strictly better.
|
||||||
|
|
||||||
SDF drawing procs live in the `draw` package with unprefixed names (`rectangle`, `rectangle_texture`,
|
SDF drawing procs live in the `draw` package with unprefixed names (`rectangle`, `circle`,
|
||||||
`circle`, `ellipse`, `polygon`, `ring`, `line`, `line_strip`). Gradients and outlines are optional
|
`ellipse`, `polygon`, `ring`, `line`, `line_strip`). Gradients, textures, and outlines are
|
||||||
parameters on each proc rather than separate overloads. Future per-shape texture variants
|
selected via the `Brush` union and optional outline parameters rather than separate overloads.
|
||||||
(`circle_texture`, `ellipse_texture`) are additive.
|
|
||||||
|
|
||||||
#### What SDF anti-aliasing does and does not do for textured draws
|
#### What SDF anti-aliasing does and does not do for textured draws
|
||||||
|
|
||||||
@@ -858,8 +874,8 @@ depends on how closely the display size matches the SDL_ttf atlas's rasterized s
|
|||||||
#### Fit modes are a computation layer, not a renderer concept
|
#### Fit modes are a computation layer, not a renderer concept
|
||||||
|
|
||||||
Standard image-fit behaviors (stretch, fill/cover, fit/contain, tile, center) are expressed as UV
|
Standard image-fit behaviors (stretch, fill/cover, fit/contain, tile, center) are expressed as UV
|
||||||
sub-region computations on top of the `uv_rect` parameter that both textured-draw procs accept. The
|
sub-region computations on top of the `uv_rect` field of `Texture_Fill`. The renderer has no
|
||||||
renderer has no knowledge of fit modes — it samples whatever UV region it is given.
|
knowledge of fit modes — it samples whatever UV region it is given.
|
||||||
|
|
||||||
A `fit_params` helper computes the appropriate `uv_rect`, sampler preset, and (for letterbox/fit
|
A `fit_params` helper computes the appropriate `uv_rect`, sampler preset, and (for letterbox/fit
|
||||||
mode) shrunken inner rect from a `Fit_Mode` enum, the target rect, and the texture's pixel size.
|
mode) shrunken inner rect from a `Fit_Mode` enum, the target rect, and the texture's pixel size.
|
||||||
@@ -883,13 +899,13 @@ textures onto a free list that is processed in `r_end_frame`, not at the call si
|
|||||||
|
|
||||||
Clay's `RenderCommandType.Image` is handled by dereferencing `imageData: rawptr` as a pointer to a
|
Clay's `RenderCommandType.Image` is handled by dereferencing `imageData: rawptr` as a pointer to a
|
||||||
`Clay_Image_Data` struct containing a `Texture_Id`, `Fit_Mode`, and tint color. Routing mirrors the
|
`Clay_Image_Data` struct containing a `Texture_Id`, `Fit_Mode`, and tint color. Routing mirrors the
|
||||||
existing rectangle handling: `fit_params` computes UVs from the fit mode, then
|
existing rectangle handling: `fit_params` computes UVs from the fit mode, then `rectangle` is
|
||||||
`rectangle_texture` is called with the appropriate radii (zero for sharp corners, per-corner values
|
called with a `Texture_Fill` brush and the appropriate radii (zero for sharp corners, per-corner
|
||||||
from Clay's `cornerRadius` otherwise).
|
values from Clay's `cornerRadius` otherwise).
|
||||||
|
|
||||||
#### Deferred features
|
#### Deferred features
|
||||||
|
|
||||||
The following are plumbed in the descriptor but not implemented in phase 1:
|
The following are plumbed in `Texture_Desc` but not yet implemented:
|
||||||
|
|
||||||
- **Mipmaps**: `Texture_Desc.mip_levels` field exists; generation via SDL3 deferred.
|
- **Mipmaps**: `Texture_Desc.mip_levels` field exists; generation via SDL3 deferred.
|
||||||
- **Compressed formats**: `Texture_Desc.format` accepts BC/ASTC; upload path deferred.
|
- **Compressed formats**: `Texture_Desc.format` accepts BC/ASTC; upload path deferred.
|
||||||
@@ -897,7 +913,6 @@ The following are plumbed in the descriptor but not implemented in phase 1:
|
|||||||
- **3D textures, arrays, cube maps**: `Texture_Desc.type` and `depth_or_layers` fields exist.
|
- **3D textures, arrays, cube maps**: `Texture_Desc.type` and `depth_or_layers` fields exist.
|
||||||
- **Additional samplers**: anisotropic, trilinear, clamp-to-border — additive enum values.
|
- **Additional samplers**: anisotropic, trilinear, clamp-to-border — additive enum values.
|
||||||
- **Atlas packing**: internal optimization for sub-batch coalescing; invisible to callers.
|
- **Atlas packing**: internal optimization for sub-batch coalescing; invisible to callers.
|
||||||
- **Per-shape texture variants**: `circle_texture`, `ellipse_texture`, `polygon_texture` — potential future additions, following the existing naming convention.
|
|
||||||
|
|
||||||
**References:**
|
**References:**
|
||||||
|
|
||||||
|
|||||||
+33
-32
@@ -21,16 +21,16 @@ import sdl "vendor:sdl3"
|
|||||||
// sigma_phys ≤ 8 → factor = 2
|
// sigma_phys ≤ 8 → factor = 2
|
||||||
// sigma_phys > 8 → factor = 4 (capped)
|
// sigma_phys > 8 → factor = 4 (capped)
|
||||||
//
|
//
|
||||||
// Capped at factor=4: master's preference for visual quality over bandwidth at the high end.
|
// Capped at factor=4 to favor visual quality over bandwidth at the high end. Larger factors
|
||||||
// Larger factors (8 and 16) would lose more high-frequency detail than the kernel can mask
|
// (8 and 16) would lose more high-frequency detail than the kernel can mask even with the
|
||||||
// even with the H+V split, and the bandwidth saving is small (the work region also shrinks
|
// H+V split, and the bandwidth saving is small (the work region also shrinks quadratically,
|
||||||
// quadratically, so most of the savings are already captured at factor=4).
|
// so most of the savings are already captured at factor=4).
|
||||||
//
|
//
|
||||||
// Working textures are sized at full swapchain resolution to support factor=1. Larger factors
|
// Working textures are sized at full swapchain resolution to support factor=1. Larger factors
|
||||||
// just write to a smaller sub-rect via viewport-limited rendering. Memory cost: ½-res → full-
|
// just write to a smaller sub-rect via viewport-limited rendering. Memory cost: full-res
|
||||||
// res working textures means 4× more bytes per working texture (2 textures, RGBA8: roughly
|
// working textures (2 textures, RGBA8) is roughly 16 MB at 1080p, 64 MB at 4K. On modern
|
||||||
// 16 MB at 1080p, 64 MB at 4K). On modern GPUs this is well within budget; on Mali Valhall
|
// GPUs this is well within budget; on Mali Valhall SBCs it's negligible against unified-
|
||||||
// SBCs it's negligible against unified-memory headroom.
|
// memory headroom.
|
||||||
//
|
//
|
||||||
// The shaders read the factor as a uniform. The downsample shader has three paths (factor=1
|
// The shaders read the factor as a uniform. The downsample shader has three paths (factor=1
|
||||||
// identity, factor=2 single bilinear tap, factor>=4 four bilinear taps with offsets scaling
|
// identity, factor=2 single bilinear tap, factor>=4 four bilinear taps with offsets scaling
|
||||||
@@ -86,7 +86,7 @@ Backdrop_Vert_Uniforms :: struct {
|
|||||||
// shaders/source/backdrop_downsample.frag.
|
// shaders/source/backdrop_downsample.frag.
|
||||||
Backdrop_Downsample_Frag_Uniforms :: struct {
|
Backdrop_Downsample_Frag_Uniforms :: struct {
|
||||||
inv_source_size: [2]f32, // 0: 8 — 1.0 / source_texture pixel dimensions (full-res)
|
inv_source_size: [2]f32, // 0: 8 — 1.0 / source_texture pixel dimensions (full-res)
|
||||||
downsample_factor: u32, // 8: 4 — 2 or 4 (selects 1-tap vs 4-tap path in shader)
|
downsample_factor: u32, // 8: 4 — 1, 2, or 4 (selects identity / 1-tap / 4-tap path in shader)
|
||||||
_pad0: u32, // 12: 4
|
_pad0: u32, // 12: 4
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -120,11 +120,12 @@ Pipeline_2D_Backdrop :: struct {
|
|||||||
primitive_buffer: Buffer,
|
primitive_buffer: Buffer,
|
||||||
|
|
||||||
// Working textures, allocated once at swapchain resolution and recreated only on resize.
|
// Working textures, allocated once at swapchain resolution and recreated only on resize.
|
||||||
// `source_texture` is full-resolution; the other two are ¼-res. All single-sample.
|
// All three are sized at full swapchain resolution and single-sample. Larger downsample
|
||||||
|
// factors fill only a sub-rect via viewport-limited rendering (see file-header comment).
|
||||||
// source_texture — when any backdrop draw exists this frame, the entire frame renders
|
// source_texture — when any backdrop draw exists this frame, the entire frame renders
|
||||||
// here instead of the swapchain (Approach B). Copied to the swapchain
|
// here instead of the swapchain. Copied to the swapchain at frame
|
||||||
// at frame end. Acts as the bracket's snapshot input by virtue of
|
// end. Acts as the bracket's snapshot input by virtue of already
|
||||||
// already containing the pre-bracket frame.
|
// containing the pre-bracket frame.
|
||||||
// downsample_texture — written by the downsample PSO. Read by the blur PSO in mode 0.
|
// downsample_texture — written by the downsample PSO. Read by the blur PSO in mode 0.
|
||||||
// h_blur_texture — written by the blur PSO in mode 0. Read by the blur PSO in mode 1.
|
// h_blur_texture — written by the blur PSO in mode 0. Read by the blur PSO in mode 1.
|
||||||
source_texture: ^sdl.GPUTexture,
|
source_texture: ^sdl.GPUTexture,
|
||||||
@@ -243,7 +244,7 @@ create_pipeline_2d_backdrop :: proc(
|
|||||||
|
|
||||||
//----- Downsample PSO ----------------------------------
|
//----- Downsample PSO ----------------------------------
|
||||||
// Single bilinear sample, blend disabled. No vertex buffer (gl_VertexIndex 0..2 emits the
|
// Single bilinear sample, blend disabled. No vertex buffer (gl_VertexIndex 0..2 emits the
|
||||||
// fullscreen triangle). Single-sample target (the ¼-res working textures are never MSAA).
|
// fullscreen triangle). Single-sample target (working textures are never MSAA).
|
||||||
downsample_target := sdl.GPUColorTargetDescription {
|
downsample_target := sdl.GPUColorTargetDescription {
|
||||||
format = swapchain_format,
|
format = swapchain_format,
|
||||||
blend_state = sdl.GPUColorTargetBlendState{enable_blend = false},
|
blend_state = sdl.GPUColorTargetBlendState{enable_blend = false},
|
||||||
@@ -350,9 +351,9 @@ destroy_pipeline_2d_backdrop :: proc(device: ^sdl.GPUDevice, pipeline: ^Pipeline
|
|||||||
// ---------------------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
// Allocate (or reallocate, on resize) the three working textures that the backdrop bracket
|
// Allocate (or reallocate, on resize) the three working textures that the backdrop bracket
|
||||||
// uses. `source_texture` is full swapchain resolution; the other two are ¼-res. All single-
|
// uses. All three are sized at full swapchain resolution, single-sample, share the swapchain
|
||||||
// sample, all share the swapchain format, all need {.COLOR_TARGET, .SAMPLER} usage so they
|
// format, and need {.COLOR_TARGET, .SAMPLER} usage so they can be written by render passes
|
||||||
// can be written by render passes and read by subsequent passes.
|
// and read by subsequent passes.
|
||||||
//
|
//
|
||||||
// Recreates on dimension change only — same-size frames hit the early-out and skip GPU
|
// Recreates on dimension change only — same-size frames hit the early-out and skip GPU
|
||||||
// resource churn.
|
// resource churn.
|
||||||
@@ -466,19 +467,19 @@ ensure_backdrop_textures :: proc(device: ^sdl.GPUDevice, format: sdl.GPUTextureF
|
|||||||
// `i in [1, pair_count)` and does two texture fetches per pair — one at +offset, one at
|
// `i in [1, pair_count)` and does two texture fetches per pair — one at +offset, one at
|
||||||
// -offset — for a total of 1 + 2*(pair_count-1) bilinear fetches per fragment.
|
// -offset — for a total of 1 + 2*(pair_count-1) bilinear fetches per fragment.
|
||||||
//
|
//
|
||||||
// `sigma` is the true Gaussian standard deviation in the kernel's working-space units (¼-res
|
// `sigma` is the true Gaussian standard deviation in the kernel's working-space units
|
||||||
// texels, after the caller has converted from logical pixels via dpi_scaling and the
|
// (working-resolution texels, after the caller has converted from logical pixels via
|
||||||
// downsample factor). The kernel extent reaches ±3σ, capturing 99.7% of the Gaussian's
|
// dpi_scaling and the downsample factor). The kernel extent reaches ±3σ, capturing 99.7% of
|
||||||
|
// the Gaussian's
|
||||||
// mass; weights beyond that contribute imperceptibly. sigma <= 0 produces a degenerate
|
// mass; weights beyond that contribute imperceptibly. sigma <= 0 produces a degenerate
|
||||||
// kernel `{1, 0}` that acts as a sharp pass-through. After the loop, the discrete weights
|
// kernel `{1, 0}` that acts as a sharp pass-through. After the loop, the discrete weights
|
||||||
// are normalized so they sum to 1.0 (truncating at ±3σ loses a tiny amount of mass; we
|
// are normalized so they sum to 1.0 (truncating at ±3σ loses a tiny amount of mass; we
|
||||||
// renormalize to preserve overall image brightness).
|
// renormalize to preserve overall image brightness).
|
||||||
//
|
//
|
||||||
// Earlier versions of this routine ported RAD Debugger's algorithm verbatim, which derives
|
// Note on the parameter contract: this routine takes σ directly and derives the tap count
|
||||||
// stdev from a tap-count parameter (`stdev = (blur_count-1)/2`). That made the parameter
|
// from it, rather than the inverse (RAD Debugger's algorithm passes a tap count and derives
|
||||||
// name misleading: the user thought they were passing σ but were actually passing
|
// `stdev = (blur_count-1)/2`). Taking σ directly matches what callers expect when they read
|
||||||
// half-kernel-width. This version takes σ directly and derives the tap count from it,
|
// "gaussian_sigma" — passing tap count under that name was a footgun.
|
||||||
// matching what callers expect when they read "gaussian_sigma".
|
|
||||||
@(private)
|
@(private)
|
||||||
compute_blur_kernel :: proc(sigma: f32, kernel: ^[MAX_BACKDROP_KERNEL_PAIRS][4]f32) -> (pair_count: u32) {
|
compute_blur_kernel :: proc(sigma: f32, kernel: ^[MAX_BACKDROP_KERNEL_PAIRS][4]f32) -> (pair_count: u32) {
|
||||||
if sigma <= 0 {
|
if sigma <= 0 {
|
||||||
@@ -624,7 +625,7 @@ upload_backdrop_primitives :: proc(device: ^sdl.GPUDevice, pass: ^sdl.GPUCopyPas
|
|||||||
// ---------------------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
// Returns true if any sub-batch in any layer this frame is .Backdrop kind. Called once at the
|
// Returns true if any sub-batch in any layer this frame is .Backdrop kind. Called once at the
|
||||||
// top of `end()` to decide whether to route the whole frame to source_texture (Approach B).
|
// top of `end()` to decide whether to route the whole frame to source_texture.
|
||||||
// O(total sub-batches) but with an early-exit on the first hit, so typical cost is tiny.
|
// O(total sub-batches) but with an early-exit on the first hit, so typical cost is tiny.
|
||||||
@(private)
|
@(private)
|
||||||
frame_has_backdrop :: proc() -> bool {
|
frame_has_backdrop :: proc() -> bool {
|
||||||
@@ -742,10 +743,10 @@ compute_backdrop_group_work_region :: proc(
|
|||||||
// target viewport, per-primitive SDF discard handles masking and applies the tint. Each
|
// target viewport, per-primitive SDF discard handles masking and applies the tint. Each
|
||||||
// sub-batch in the group is one instanced draw.
|
// sub-batch in the group is one instanced draw.
|
||||||
//
|
//
|
||||||
// V-blur was historically combined with the composite into a single shader invocation, but
|
// V-blur is run as its own working→working pass rather than folded into the composite. The
|
||||||
// that produced a horizontal-vs-vertical asymmetry artifact (horizontal source features
|
// folded variant produces a horizontal-vs-vertical asymmetry artifact (horizontal source
|
||||||
// looked sharper than vertical ones inside the panel). Splitting V-blur into its own
|
// features end up looking sharper than vertical ones inside the panel). Matching V's
|
||||||
// working→working pass restores symmetry by making H and V blurs structurally identical.
|
// structure exactly to H's restores symmetry.
|
||||||
//
|
//
|
||||||
// On exit, source_texture contains the pre-bracket contents plus all backdrop primitives
|
// On exit, source_texture contains the pre-bracket contents plus all backdrop primitives
|
||||||
// composited on top. The caller then runs Pass B (post-bracket non-backdrop sub-batches) on
|
// composited on top. The caller then runs Pass B (post-bracket non-backdrop sub-batches) on
|
||||||
@@ -1011,8 +1012,8 @@ run_backdrop_bracket :: proc(
|
|||||||
// geometry. The caller sets `color` (tint) on the returned primitive before submitting.
|
// geometry. The caller sets `color` (tint) on the returned primitive before submitting.
|
||||||
//
|
//
|
||||||
// No rotation, no outline — backdrop primitives are intentionally limited to axis-aligned
|
// No rotation, no outline — backdrop primitives are intentionally limited to axis-aligned
|
||||||
// RRects in v1. Rotation breaks screen-space blur sampling visually; outline would be a
|
// RRects. Rotation breaks screen-space blur sampling visually; outline would be a specialized
|
||||||
// specialized edge effect that belongs in its own primitive type.
|
// edge effect that belongs in its own primitive type.
|
||||||
@(private)
|
@(private)
|
||||||
build_backdrop_primitive :: proc(
|
build_backdrop_primitive :: proc(
|
||||||
rect: Rectangle,
|
rect: Rectangle,
|
||||||
|
|||||||
+5
-5
@@ -830,9 +830,9 @@ end :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window, clear_color: Color = DF
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pre-scan: if any layer this frame has a backdrop sub-batch, route the entire frame to
|
// Pre-scan: if any layer this frame has a backdrop sub-batch, route the entire frame to
|
||||||
// source_texture (Approach B) so the bracket can sample the pre-bracket framebuffer
|
// source_texture so the bracket can sample the pre-bracket framebuffer without a mid-
|
||||||
// without a mid-frame texture copy. Frames without any backdrop hit the existing fast
|
// frame texture copy. Frames without any backdrop hit the existing fast path and never
|
||||||
// path and never touch the backdrop pipeline's working textures.
|
// touch the backdrop pipeline's working textures.
|
||||||
has_backdrop := frame_has_backdrop()
|
has_backdrop := frame_has_backdrop()
|
||||||
|
|
||||||
// Upload primitives to GPU (vertices, indices, SDF prims, and backdrop prims share one
|
// Upload primitives to GPU (vertices, indices, SDF prims, and backdrop prims share one
|
||||||
@@ -880,8 +880,8 @@ end :: proc(device: ^sdl.GPUDevice, window: ^sdl.Window, clear_color: Color = DF
|
|||||||
draw_layer(device, window, cmd_buffer, render_texture, width, height, clear_color_f32, &layer)
|
draw_layer(device, window, cmd_buffer, render_texture, width, height, clear_color_f32, &layer)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Approach B finalization: when we rendered into source_texture, copy it to the swapchain.
|
// When we rendered into source_texture, copy it to the swapchain. Single
|
||||||
// Single CopyGPUTextureToTexture call per frame, only when backdrop content was present.
|
// CopyGPUTextureToTexture call per frame, only when backdrop content was present.
|
||||||
if has_backdrop {
|
if has_backdrop {
|
||||||
copy_pass := sdl.BeginGPUCopyPass(cmd_buffer)
|
copy_pass := sdl.BeginGPUCopyPass(cmd_buffer)
|
||||||
sdl.CopyGPUTextureToTexture(
|
sdl.CopyGPUTextureToTexture(
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ texture_size :: #force_inline proc(qrcode_buf: []u8) -> int {
|
|||||||
//
|
//
|
||||||
// Returns ok=false when:
|
// Returns ok=false when:
|
||||||
// - qrcode_buf is invalid (qrcode.get_size returns 0).
|
// - qrcode_buf is invalid (qrcode.get_size returns 0).
|
||||||
// - texture_buf is smaller than to_texture_size(qrcode_buf).
|
// - texture_buf is smaller than texture_size(qrcode_buf).
|
||||||
@(require_results)
|
@(require_results)
|
||||||
to_texture :: proc(
|
to_texture :: proc(
|
||||||
qrcode_buf: []u8,
|
qrcode_buf: []u8,
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ import cyber "../cybersteel"
|
|||||||
|
|
||||||
// Backdrop example.
|
// Backdrop example.
|
||||||
//
|
//
|
||||||
// Verifies the Stage D bracket scheduler end-to-end. The demo is structured as three zones in
|
// Exercises the bracket scheduler end-to-end. The demo is structured as three zones in one
|
||||||
// one window so we can stress-test the cases that matter:
|
// window so we can stress-test the cases that matter:
|
||||||
//
|
//
|
||||||
// Zone 1 (top, base layer): animated colorful background + two side-by-side frosted panels
|
// Zone 1 (top, base layer): animated colorful background + two side-by-side frosted panels
|
||||||
// with DIFFERENT sigmas and DIFFERENT tints. Tests sigma grouping
|
// with DIFFERENT sigmas and DIFFERENT tints. Tests sigma grouping
|
||||||
@@ -269,9 +269,8 @@ gaussian_blur :: proc() {
|
|||||||
// SPACE : reset to sigma=10
|
// SPACE : reset to sigma=10
|
||||||
// T : toggle the test rectangle on top of the panel
|
// T : toggle the test rectangle on top of the panel
|
||||||
//
|
//
|
||||||
// Sigma is printed to the console label and to the title bar so you can correlate visual
|
// Sigma is printed to the title bar so you can correlate visual behavior with the numeric
|
||||||
// behavior with kernel state (which is also logged via the [backdrop] debug print in
|
// value as you adjust it.
|
||||||
// backdrop.odin's compute_blur_kernel callsite).
|
|
||||||
gaussian_blur_debug :: proc() {
|
gaussian_blur_debug :: proc() {
|
||||||
if !sdl.Init({.VIDEO}) do os.exit(1)
|
if !sdl.Init({.VIDEO}) do os.exit(1)
|
||||||
window := sdl.CreateWindow("Backdrop debug", 800, 600, {.HIGH_PIXEL_DENSITY})
|
window := sdl.CreateWindow("Backdrop debug", 800, 600, {.HIGH_PIXEL_DENSITY})
|
||||||
|
|||||||
@@ -116,9 +116,9 @@ Gradient_Outline :: struct {
|
|||||||
// avoiding per-pixel trigonometry in the fragment shader. Only read when .Rotated is set.
|
// avoiding per-pixel trigonometry in the fragment shader. Only read when .Rotated is set.
|
||||||
//
|
//
|
||||||
// Named Base_2D_Primitive (not just Primitive) to disambiguate from Backdrop_Primitive in
|
// Named Base_2D_Primitive (not just Primitive) to disambiguate from Backdrop_Primitive in
|
||||||
// pipeline_2d_backdrop.odin. The two pipelines have unrelated GPU layouts and unrelated
|
// backdrop.odin. The two pipelines have unrelated GPU layouts and unrelated fragment-shader
|
||||||
// fragment-shader contracts; pairing each with its own primitive type keeps cross-references
|
// contracts; pairing each with its own primitive type keeps cross-references unambiguous
|
||||||
// unambiguous when grepping the codebase.
|
// when grepping the codebase.
|
||||||
Base_2D_Primitive :: struct {
|
Base_2D_Primitive :: struct {
|
||||||
bounds: [4]f32, // 0: min_x, min_y, max_x, max_y (world-space, pre-DPI)
|
bounds: [4]f32, // 0: min_x, min_y, max_x, max_y (world-space, pre-DPI)
|
||||||
color: Color, // 16: u8x4, fill color / gradient start color / texture tint
|
color: Color, // 16: u8x4, fill color / gradient start color / texture tint
|
||||||
|
|||||||
@@ -1,19 +1,18 @@
|
|||||||
#version 450 core
|
#version 450 core
|
||||||
|
|
||||||
// Unified backdrop blur fragment shader.
|
// Unified backdrop blur fragment shader.
|
||||||
// Handles both H-blur (mode 0, blurs the ¼-resolution downsample texture into
|
// Handles both the 1D separable blur passes (mode 0, used for BOTH the H-pass and V-pass;
|
||||||
// the ¼-resolution h_blur texture) and V-blur+composite (mode 1, blurs h_blur
|
// `direction` picks the axis) and the composite pass (mode 1, reads the fully-blurred
|
||||||
// vertically, masks via RRect SDF, applies tint, composites outline, and writes
|
// working texture, masks via RRect SDF, applies tint, and writes to source_texture with
|
||||||
// to the main render target with premultiplied alpha).
|
// premultiplied-over blending). Working textures are sized at the full swapchain resolution;
|
||||||
|
// downsampled content occupies only a sub-rect at downsample factor > 1 (set via viewport).
|
||||||
//
|
//
|
||||||
// Following RAD's pattern, V-mode replaces a separate composite pass: the SDF
|
// The composite blends with source_texture via the standard premultiplied-over blend state
|
||||||
// discard limits V-blur work to the masked region, and the per-primitive tint
|
// (ONE, ONE_MINUS_SRC_ALPHA).
|
||||||
// is folded in. Output blends with the main render target via the standard
|
|
||||||
// premultiplied-over blend state (ONE, ONE_MINUS_SRC_ALPHA).
|
|
||||||
//
|
//
|
||||||
// Backdrop primitives are tint-only — there is no outline. A specialized edge
|
// Backdrop primitives are tint-only — there is no outline. A specialized edge effect
|
||||||
// effect (e.g. liquid-glass-style refraction outlines) would be implemented
|
// (e.g. liquid-glass-style refraction outlines) would be implemented as a dedicated
|
||||||
// as a dedicated primitive type with its own pipeline.
|
// primitive type with its own pipeline.
|
||||||
//
|
//
|
||||||
// Two modes, structurally distinct:
|
// Two modes, structurally distinct:
|
||||||
//
|
//
|
||||||
@@ -30,11 +29,11 @@
|
|||||||
// (gl_FragCoord.xy * inv_downsample_factor) * inv_working_size.
|
// (gl_FragCoord.xy * inv_downsample_factor) * inv_working_size.
|
||||||
// No kernel is applied here — the blur is already complete.
|
// No kernel is applied here — the blur is already complete.
|
||||||
//
|
//
|
||||||
// Splitting V-blur out of the composite pass (an earlier version combined them) was needed
|
// V-blur is run as its own working→working pass rather than folded into the composite. The
|
||||||
// to avoid a horizontal-vs-vertical asymmetry artifact: when the V-blur sampled the H-blur
|
// folded variant produced a horizontal-vs-vertical asymmetry artifact: when V-blur sampled
|
||||||
// output through the bilinear-upsample/SDF-mask/tint pipeline in one shader invocation,
|
// the H-blur output through the bilinear-upsample/SDF-mask/tint pipeline in one shader
|
||||||
// horizontal source features ended up looking sharper than vertical ones. Running V-blur as
|
// invocation, horizontal source features ended up looking sharper than vertical ones.
|
||||||
// its own working→working pass (matching H's structure exactly) restores symmetry.
|
// Matching V's structure exactly to H's restores symmetry.
|
||||||
|
|
||||||
const uint MAX_KERNEL_PAIRS = 32;
|
const uint MAX_KERNEL_PAIRS = 32;
|
||||||
|
|
||||||
@@ -140,16 +139,16 @@ void main() {
|
|||||||
vec2 uv = (gl_FragCoord.xy * inv_downsample_factor) * inv_working_size;
|
vec2 uv = (gl_FragCoord.xy * inv_downsample_factor) * inv_working_size;
|
||||||
vec3 color = texture(blur_input_tex, uv).rgb;
|
vec3 color = texture(blur_input_tex, uv).rgb;
|
||||||
|
|
||||||
// Tint composition (Option B semantics): inside the masked region the panel is fully
|
// Tint composition: inside the masked region the panel is fully opaque — it completely
|
||||||
// opaque — it completely hides the original framebuffer content, just like real frosted
|
// hides the original framebuffer content, just like real frosted glass and like iOS
|
||||||
// glass and like iOS UIBlurEffect / CSS backdrop-filter. f_color.rgb specifies the tint
|
// UIBlurEffect / CSS backdrop-filter. f_color.rgb specifies the tint color; f_color.a
|
||||||
// color; f_color.a specifies the tint *mix strength* (NOT panel opacity). At alpha=0 we
|
// specifies the tint *mix strength* (NOT panel opacity). At alpha=0 we see the pure
|
||||||
// see the pure blur; at alpha=255 we see the blur fully multiplied by the tint color.
|
// blur; at alpha=255 we see the blur fully multiplied by the tint color.
|
||||||
//
|
//
|
||||||
// Output is premultiplied to match the ONE, ONE_MINUS_SRC_ALPHA blend state. Coverage
|
// Output is premultiplied to match the ONE, ONE_MINUS_SRC_ALPHA blend state. Coverage
|
||||||
// (the SDF mask's edge AA) modulates only the alpha channel, never the panel-vs-source
|
// (the SDF mask's edge AA) modulates only the alpha channel, never the panel-vs-source
|
||||||
// blend; that way edge pixels still feather correctly without re-introducing the bug
|
// blend; that way edge pixels still feather correctly while mid-panel pixels stay fully
|
||||||
// where mid-panel pixels became semi-transparent.
|
// opaque.
|
||||||
mediump vec3 tinted = mix(color, color * f_color.rgb, f_color.a);
|
mediump vec3 tinted = mix(color, color * f_color.rgb, f_color.a);
|
||||||
mediump float coverage = sdf_alpha(d_n, h_n);
|
mediump float coverage = sdf_alpha(d_n, h_n);
|
||||||
out_color = vec4(tinted * coverage, coverage);
|
out_color = vec4(tinted * coverage, coverage);
|
||||||
|
|||||||
@@ -1,18 +1,19 @@
|
|||||||
#version 450 core
|
#version 450 core
|
||||||
|
|
||||||
// Unified backdrop blur vertex shader.
|
// Unified backdrop blur vertex shader.
|
||||||
// Handles both H-blur (fullscreen triangle, mode 0) and V-blur+composite (instanced
|
// Handles both the 1D separable blur passes (fullscreen triangle, mode 0; used for
|
||||||
// unit-quad over Backdrop_Primitive storage buffer, mode 1) for the second PSO of
|
// BOTH the H-pass and V-pass) and the composite pass (instanced unit-quad over
|
||||||
// the backdrop bracket. The first PSO (downsample) uses backdrop_fullscreen.vert.
|
// Backdrop_Primitive storage buffer, mode 1) for the second PSO of the backdrop bracket.
|
||||||
|
// The first PSO (downsample) uses backdrop_fullscreen.vert.
|
||||||
//
|
//
|
||||||
// No vertex buffer for either mode. Mode 0 uses gl_VertexIndex 0..2 for a single
|
// No vertex buffer for either mode. Mode 0 uses gl_VertexIndex 0..2 for a single
|
||||||
// fullscreen triangle; mode 1 uses gl_VertexIndex 0..5 for a unit-quad (two
|
// fullscreen triangle; mode 1 uses gl_VertexIndex 0..5 for a unit-quad (two
|
||||||
// triangles, TRIANGLELIST topology) and gl_InstanceIndex to select the primitive.
|
// triangles, TRIANGLELIST topology) and gl_InstanceIndex to select the primitive.
|
||||||
//
|
//
|
||||||
// Mode 0 viewport+scissor are CPU-set per layer-bracket to the work region (union
|
// Mode 0 viewport+scissor are CPU-set per sigma group to the work region (union AABB
|
||||||
// AABB of backdrop primitives + 3*max_sigma, clamped to swapchain bounds). Mode 1
|
// of that group's backdrop primitives + halo, clamped to swapchain bounds). Mode 1
|
||||||
// renders into the main render target with the screen-space orthographic projection;
|
// renders into source_texture with the screen-space orthographic projection; the
|
||||||
// the per-primitive bounds drive the quad in screen space.
|
// per-primitive bounds drive the quad in screen space.
|
||||||
//
|
//
|
||||||
// Backdrop primitives have NO rotation — backdrop sampling is in screen space, so
|
// Backdrop primitives have NO rotation — backdrop sampling is in screen space, so
|
||||||
// a rotated mask over a stationary blur sample would look wrong.
|
// a rotated mask over a stationary blur sample would look wrong.
|
||||||
@@ -46,11 +47,11 @@ layout(set = 1, binding = 0) uniform Uniforms {
|
|||||||
// vec2 and scalar tail packs tight to land the struct at a clean 48-byte
|
// vec2 and scalar tail packs tight to land the struct at a clean 48-byte
|
||||||
// stride (a multiple of 16, so the array stride needs no rounding either).
|
// stride (a multiple of 16, so the array stride needs no rounding either).
|
||||||
// Field semantics match the CPU-side Backdrop_Primitive declared in
|
// Field semantics match the CPU-side Backdrop_Primitive declared in
|
||||||
// levlib/draw/pipeline_2d_backdrop.odin; keep both in sync.
|
// levlib/draw/backdrop.odin; keep both in sync.
|
||||||
//
|
//
|
||||||
// Backdrop primitives are tint-only in v1: outline is intentionally absent.
|
// Backdrop primitives are tint-only: outline is intentionally absent. Specialized
|
||||||
// Future specialized effects (e.g. liquid-glass-style edges) would be a
|
// edge effects (e.g. liquid-glass-style refraction outlines) would be a dedicated
|
||||||
// dedicated primitive type with its own pipeline rather than a flag bit here.
|
// primitive type with its own pipeline rather than a flag bit here.
|
||||||
struct Backdrop_Primitive {
|
struct Backdrop_Primitive {
|
||||||
vec4 bounds; // 0-15: min_xy, max_xy (world-space)
|
vec4 bounds; // 0-15: min_xy, max_xy (world-space)
|
||||||
vec4 radii; // 16-31: per-corner radii (physical px)
|
vec4 radii; // 16-31: per-corner radii (physical px)
|
||||||
|
|||||||
@@ -2,9 +2,9 @@
|
|||||||
|
|
||||||
// Backdrop downsample fragment shader.
|
// Backdrop downsample fragment shader.
|
||||||
// Reads source_texture (full-resolution snapshot of pre-bracket framebuffer contents) and
|
// Reads source_texture (full-resolution snapshot of pre-bracket framebuffer contents) and
|
||||||
// writes a downsampled copy at factor 1, 2, 4, 8, or 16. The output is the working texture
|
// writes a downsampled copy at factor 1, 2, or 4. The output is the working texture (sized
|
||||||
// (sized at full swapchain resolution); larger factors only fill a sub-rect of it via the
|
// at full swapchain resolution); larger factors only fill a sub-rect of it via the CPU-set
|
||||||
// CPU-set viewport. See backdrop.odin for the factor selection table (Flutter-style).
|
// viewport. See backdrop.odin for the factor selection table (Flutter-style).
|
||||||
//
|
//
|
||||||
// Shader paths by factor:
|
// Shader paths by factor:
|
||||||
//
|
//
|
||||||
@@ -15,15 +15,12 @@
|
|||||||
// factor=2: each output covers a 2×2 source block. Single bilinear tap at the shared
|
// factor=2: each output covers a 2×2 source block. Single bilinear tap at the shared
|
||||||
// corner reads all 4 source pixels with 0.25 weight.
|
// corner reads all 4 source pixels with 0.25 weight.
|
||||||
//
|
//
|
||||||
// factor>=4: each output covers a (factor)×(factor) source block. We use 4 bilinear taps,
|
// factor=4: each output covers a 4×4 source block. We use 4 bilinear taps, each at the
|
||||||
// each at the shared corner of a (factor/2)×(factor/2) sub-block. Each tap reads
|
// shared corner of a 2×2 sub-block. Each tap reads 4 source pixels uniformly;
|
||||||
// 4 source pixels uniformly; combined, the 4 taps sample 16 source pixels arranged
|
// combined, the 4 taps sample 16 source pixels arranged uniformly across the
|
||||||
// uniformly across the block. This is an approximation of a true (factor)² box
|
// block (full coverage at factor=4). The factor>=4 path is structured so the
|
||||||
// filter — exact at factor=4 (16 pixels = full coverage), undersampled at factor=8
|
// same shader code would extend to factor=8 (16 pixels of 64) or factor=16 (16
|
||||||
// (16 pixels of 64) and factor=16 (16 of 256). Flutter uses a richer 13-tap COD-
|
// of 256) if the CPU-side cap is ever raised, though the current cap is 4.
|
||||||
// style downsample shader at high factors; we accept the simpler 4-tap pattern
|
|
||||||
// for now since the high-factor cases come with large kernels that mask any
|
|
||||||
// residual aliasing.
|
|
||||||
//
|
//
|
||||||
// The viewport+scissor are set by the CPU to limit output to the layer's work region in
|
// The viewport+scissor are set by the CPU to limit output to the layer's work region in
|
||||||
// working-texture coords (work_region_phys / factor), clamped to the texture bounds.
|
// working-texture coords (work_region_phys / factor), clamped to the texture bounds.
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ layout(std430, set = 0, binding = 0) readonly buffer Base_2D_Primitives {
|
|||||||
// ---------- Entry point ----------
|
// ---------- Entry point ----------
|
||||||
void main() {
|
void main() {
|
||||||
if (mode == 0u) {
|
if (mode == 0u) {
|
||||||
// ---- Mode 0: Tessellated (legacy) ----
|
// ---- Mode 0: Tessellated (used for text and arbitrary user geometry) ----
|
||||||
f_color = v_color;
|
f_color = v_color;
|
||||||
f_local_or_uv = v_uv;
|
f_local_or_uv = v_uv;
|
||||||
f_params = vec4(0.0);
|
f_params = vec4(0.0);
|
||||||
|
|||||||
+2
-1
@@ -53,7 +53,8 @@ emit_rectangle :: proc(x, y, width, height: f32, color: Color, vertices: []Verte
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Internal — submit an SDF primitive with optional texture binding.
|
// Internal — submit an SDF primitive with optional texture binding.
|
||||||
// Replaces the old prepare_sdf_primitive and prepare_sdf_primitive_textured.
|
// The texture-aware counterpart of `draw.prepare_sdf_primitive`; lets shape procs route a
|
||||||
|
// texture_id and sampler into the sub-batch without growing the public API.
|
||||||
@(private)
|
@(private)
|
||||||
prepare_sdf_primitive_ex :: proc(
|
prepare_sdf_primitive_ex :: proc(
|
||||||
layer: ^Layer,
|
layer: ^Layer,
|
||||||
|
|||||||
@@ -9,46 +9,45 @@ import qr ".."
|
|||||||
|
|
||||||
main :: proc() {
|
main :: proc() {
|
||||||
//----- General setup ----------------------------------
|
//----- General setup ----------------------------------
|
||||||
{
|
// Temp
|
||||||
// Temp
|
track_temp: mem.Tracking_Allocator
|
||||||
track_temp: mem.Tracking_Allocator
|
mem.tracking_allocator_init(&track_temp, context.temp_allocator)
|
||||||
mem.tracking_allocator_init(&track_temp, context.temp_allocator)
|
context.temp_allocator = mem.tracking_allocator(&track_temp)
|
||||||
context.temp_allocator = mem.tracking_allocator(&track_temp)
|
|
||||||
|
|
||||||
// Default
|
// Default
|
||||||
track: mem.Tracking_Allocator
|
track: mem.Tracking_Allocator
|
||||||
mem.tracking_allocator_init(&track, context.allocator)
|
mem.tracking_allocator_init(&track, context.allocator)
|
||||||
context.allocator = mem.tracking_allocator(&track)
|
context.allocator = mem.tracking_allocator(&track)
|
||||||
// Log a warning about any memory that was not freed by the end of the program.
|
// Log a warning about any memory that was not freed by the end of the program.
|
||||||
// This could be fine for some global state or it could be a memory leak.
|
// This could be fine for some global state or it could be a memory leak.
|
||||||
defer {
|
defer {
|
||||||
// Temp allocator
|
// Temp allocator
|
||||||
if len(track_temp.bad_free_array) > 0 {
|
if len(track_temp.bad_free_array) > 0 {
|
||||||
fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
|
fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
|
||||||
for entry in track_temp.bad_free_array {
|
for entry in track_temp.bad_free_array {
|
||||||
fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
|
fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
|
||||||
}
|
|
||||||
mem.tracking_allocator_destroy(&track_temp)
|
|
||||||
}
|
}
|
||||||
// Default allocator
|
mem.tracking_allocator_destroy(&track_temp)
|
||||||
if len(track.allocation_map) > 0 {
|
|
||||||
fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
|
|
||||||
for _, entry in track.allocation_map {
|
|
||||||
fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(track.bad_free_array) > 0 {
|
|
||||||
fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
|
|
||||||
for entry in track.bad_free_array {
|
|
||||||
fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mem.tracking_allocator_destroy(&track)
|
|
||||||
}
|
}
|
||||||
// Logger
|
// Default allocator
|
||||||
context.logger = log.create_console_logger()
|
if len(track.allocation_map) > 0 {
|
||||||
defer log.destroy_console_logger(context.logger)
|
fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
|
||||||
|
for _, entry in track.allocation_map {
|
||||||
|
fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(track.bad_free_array) > 0 {
|
||||||
|
fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
|
||||||
|
for entry in track.bad_free_array {
|
||||||
|
fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mem.tracking_allocator_destroy(&track)
|
||||||
}
|
}
|
||||||
|
// Logger
|
||||||
|
context.logger = log.create_console_logger()
|
||||||
|
defer log.destroy_console_logger(context.logger)
|
||||||
|
|
||||||
|
|
||||||
args := os.args
|
args := os.args
|
||||||
if len(args) < 2 {
|
if len(args) < 2 {
|
||||||
|
|||||||
Vendored
+35
-36
@@ -14,46 +14,45 @@ DB_PATH :: "out/debug/lmdb_example_db"
|
|||||||
|
|
||||||
main :: proc() {
|
main :: proc() {
|
||||||
//----- General setup ----------------------------------
|
//----- General setup ----------------------------------
|
||||||
{
|
// Temp
|
||||||
// Temp
|
track_temp: mem.Tracking_Allocator
|
||||||
track_temp: mem.Tracking_Allocator
|
mem.tracking_allocator_init(&track_temp, context.temp_allocator)
|
||||||
mem.tracking_allocator_init(&track_temp, context.temp_allocator)
|
context.temp_allocator = mem.tracking_allocator(&track_temp)
|
||||||
context.temp_allocator = mem.tracking_allocator(&track_temp)
|
|
||||||
|
|
||||||
// Default
|
// Default
|
||||||
track: mem.Tracking_Allocator
|
track: mem.Tracking_Allocator
|
||||||
mem.tracking_allocator_init(&track, context.allocator)
|
mem.tracking_allocator_init(&track, context.allocator)
|
||||||
context.allocator = mem.tracking_allocator(&track)
|
context.allocator = mem.tracking_allocator(&track)
|
||||||
// Log a warning about any memory that was not freed by the end of the program.
|
// Log a warning about any memory that was not freed by the end of the program.
|
||||||
// This could be fine for some global state or it could be a memory leak.
|
// This could be fine for some global state or it could be a memory leak.
|
||||||
defer {
|
defer {
|
||||||
// Temp allocator
|
// Temp allocator
|
||||||
if len(track_temp.bad_free_array) > 0 {
|
if len(track_temp.bad_free_array) > 0 {
|
||||||
fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
|
fmt.eprintf("=== %v incorrect frees - temp allocator: ===\n", len(track_temp.bad_free_array))
|
||||||
for entry in track_temp.bad_free_array {
|
for entry in track_temp.bad_free_array {
|
||||||
fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
|
fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
|
||||||
}
|
|
||||||
mem.tracking_allocator_destroy(&track_temp)
|
|
||||||
}
|
}
|
||||||
// Default allocator
|
mem.tracking_allocator_destroy(&track_temp)
|
||||||
if len(track.allocation_map) > 0 {
|
|
||||||
fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
|
|
||||||
for _, entry in track.allocation_map {
|
|
||||||
fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(track.bad_free_array) > 0 {
|
|
||||||
fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
|
|
||||||
for entry in track.bad_free_array {
|
|
||||||
fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mem.tracking_allocator_destroy(&track)
|
|
||||||
}
|
}
|
||||||
// Logger
|
// Default allocator
|
||||||
context.logger = log.create_console_logger()
|
if len(track.allocation_map) > 0 {
|
||||||
defer log.destroy_console_logger(context.logger)
|
fmt.eprintf("=== %v allocations not freed - main allocator: ===\n", len(track.allocation_map))
|
||||||
|
for _, entry in track.allocation_map {
|
||||||
|
fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(track.bad_free_array) > 0 {
|
||||||
|
fmt.eprintf("=== %v incorrect frees - main allocator: ===\n", len(track.bad_free_array))
|
||||||
|
for entry in track.bad_free_array {
|
||||||
|
fmt.eprintf("- %p @ %v\n", entry.memory, entry.location)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mem.tracking_allocator_destroy(&track)
|
||||||
}
|
}
|
||||||
|
// Logger
|
||||||
|
context.logger = log.create_console_logger()
|
||||||
|
defer log.destroy_console_logger(context.logger)
|
||||||
|
|
||||||
|
|
||||||
environment: ^mdb.Env
|
environment: ^mdb.Env
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user