Clay custom dispatch improvements & DPI scaling fixes (#26)

Co-authored-by: Zachary Levy <zachary@sunforge.is>
Reviewed-on: #26
This commit was merged in pull request #26.
This commit is contained in:
2026-05-06 04:17:24 +00:00
parent e8ffa28de3
commit 43f08ed30c
19 changed files with 627 additions and 407 deletions
+6 -6
View File
@@ -40,9 +40,9 @@ const uint MAX_KERNEL_PAIRS = 32;
// --- Inputs from vertex shader ---
layout(location = 0) in vec2 p_local;
layout(location = 1) in mediump vec4 f_color;
layout(location = 2) flat in vec2 f_half_size;
layout(location = 3) flat in vec4 f_radii;
layout(location = 4) flat in float f_half_feather;
layout(location = 2) flat in vec2 f_half_size_ppx;
layout(location = 3) flat in vec4 f_radii_ppx;
layout(location = 4) flat in float f_half_feather_ppx;
// --- Output ---
layout(location = 0) out vec4 out_color;
@@ -123,15 +123,15 @@ void main() {
// ---- Mode 1: composite per-primitive.
// RRect SDF — early discard for fragments well outside the masked region.
float d = sdRoundedBox(p_local, f_half_size, f_radii);
if (d > f_half_feather) {
float d = sdRoundedBox(p_local, f_half_size_ppx, f_radii_ppx);
if (d > f_half_feather_ppx) {
discard;
}
// fwidth-based normalization for AA (matches main pipeline approach).
float grad_magnitude = max(fwidth(d), 1e-6);
float d_n = d / grad_magnitude;
float h_n = f_half_feather / grad_magnitude;
float h_n = f_half_feather_ppx / grad_magnitude;
// Sample the fully-blurred working-res texture. gl_FragCoord is full-res; convert to
// working-res UV via inv_downsample_factor. No kernel is applied — the H+V blur passes
+16 -16
View File
@@ -24,12 +24,12 @@
layout(location = 0) out vec2 p_local;
// f_color: tint, unpacked from primitive.color. Only meaningful in mode 1.
layout(location = 1) out mediump vec4 f_color;
// f_half_size: RRect half extents in physical pixels (mode 1 only).
layout(location = 2) flat out vec2 f_half_size;
// f_radii: per-corner radii in physical pixels (mode 1 only).
layout(location = 3) flat out vec4 f_radii;
// f_half_feather: SDF anti-aliasing feather (mode 1 only).
layout(location = 4) flat out float f_half_feather;
// f_half_size_ppx: RRect half extents in physical pixels (mode 1 only).
layout(location = 2) flat out vec2 f_half_size_ppx;
// f_radii_ppx: per-corner radii in physical pixels (mode 1 only).
layout(location = 3) flat out vec4 f_radii_ppx;
// f_half_feather_ppx: SDF anti-aliasing feather in physical pixels (mode 1 only).
layout(location = 4) flat out float f_half_feather_ppx;
// --- Uniforms (set 1) ---
// Backdrop pipeline's own uniform block — distinct from the main pipeline's
@@ -53,10 +53,10 @@ layout(set = 1, binding = 0) uniform Uniforms {
// edge effects (e.g. liquid-glass-style refraction outlines) would be a dedicated
// primitive type with its own pipeline rather than a flag bit here.
struct Gaussian_Blur_Primitive {
vec4 bounds; // 0-15: min_xy, max_xy (world-space)
vec4 radii; // 16-31: per-corner radii (physical px)
vec2 half_size; // 32-39: RRect half extents (physical px)
float half_feather; // 40-43: SDF anti-aliasing feather (physical px)
vec4 bounds; // 0-15: min_xy, max_xy (world-space, logical px)
vec4 radii_ppx; // 16-31: per-corner radii
vec2 half_size_ppx; // 32-39: RRect half extents
float half_feather_ppx; // 40-43: SDF anti-aliasing feather
uint color; // 44-47: tint, packed RGBA u8x4
};
@@ -78,9 +78,9 @@ void main() {
// Mode 0 doesn't read the per-primitive varyings; zero-init for safety.
p_local = vec2(0.0);
f_color = vec4(0.0);
f_half_size = vec2(0.0);
f_radii = vec4(0.0);
f_half_feather = 0.0;
f_half_size_ppx = vec2(0.0);
f_radii_ppx = vec4(0.0);
f_half_feather_ppx = 0.0;
} else {
// ---- Mode 1: V-composite instanced unit-quad over Gaussian_Blur_Primitive ----
Gaussian_Blur_Primitive p = primitives[gl_InstanceIndex];
@@ -101,9 +101,9 @@ void main() {
p_local = (world_pos - center) * dpi_scale;
f_color = unpackUnorm4x8(p.color);
f_half_size = p.half_size;
f_radii = p.radii;
f_half_feather = p.half_feather;
f_half_size_ppx = p.half_size_ppx;
f_radii_ppx = p.radii_ppx;
f_half_feather_ppx = p.half_feather_ppx;
gl_Position = projection * vec4(world_pos * dpi_scale, 0.0, 1.0);
}
+26 -26
View File
@@ -45,7 +45,7 @@ float sdRegularPolygon(vec2 p, float r, float n) {
return length(p) * cos(bn) - r;
}
// Coverage from SDF distance using half-feather width (feather_px * 0.5, pre-computed on CPU).
// Coverage from SDF distance using half-feather width (feather_ppx * 0.5, pre-computed on CPU).
// Produces a symmetric transition centered on d=0: smoothstep(-h, h, d).
float sdf_alpha(float d, float h) {
return 1.0 - smoothstep(-h, h, d);
@@ -80,56 +80,56 @@ void main() {
// SDF path — dispatch on kind
float d = 1e30;
float h = 0.5; // half-feather width; overwritten per shape kind
vec2 half_size = f_params.xy; // used by RRect and as reference size for gradients
float h = 0.5; // half-feather width (physical px); overwritten per shape kind
vec2 half_size_ppx = f_params.xy; // used by RRect and as reference size for gradients
vec2 p_local = f_local_or_uv; // arrives rotated; vertex shader handled .Rotated
vec2 p_local_ppx = f_local_or_uv; // arrives rotated; vertex shader handled .Rotated
if (kind == 1u) {
// RRect — half_feather in params2.z
vec4 corner_radii = vec4(f_params.zw, f_params2.xy);
// RRect — half_feather_ppx in params2.z
vec4 corner_radii_ppx = vec4(f_params.zw, f_params2.xy);
h = f_params2.z;
d = sdRoundedBox(p_local, half_size, corner_radii);
d = sdRoundedBox(p_local_ppx, half_size_ppx, corner_radii_ppx);
}
else if (kind == 2u) {
// NGon — half_feather in params.z
float radius = f_params.x;
// NGon — half_feather_ppx in params.z
float radius_ppx = f_params.x;
float sides = f_params.y;
h = f_params.z;
d = sdRegularPolygon(p_local, radius, sides);
half_size = vec2(radius); // for gradient UV computation
d = sdRegularPolygon(p_local_ppx, radius_ppx, sides);
half_size_ppx = vec2(radius_ppx); // for gradient UV computation
}
else if (kind == 3u) {
// Ellipse — half_feather in params.z
vec2 ab = f_params.xy;
// Ellipse — half_feather_ppx in params.z
vec2 radii_ppx = f_params.xy;
h = f_params.z;
d = sdEllipseApprox(p_local, ab);
half_size = ab; // for gradient UV computation
d = sdEllipseApprox(p_local_ppx, radii_ppx);
half_size_ppx = radii_ppx; // for gradient UV computation
}
else if (kind == 4u) {
// Ring_Arc — half_feather in params2.z
// Ring_Arc — half_feather_ppx in params2.z
// Arc mode from flag bits 5-6: 0 = full, 1 = narrow (≤π), 2 = wide (>π)
float inner = f_params.x;
float outer = f_params.y;
float inner_radius_ppx = f_params.x;
float outer_radius_ppx = f_params.y;
vec2 n_start = f_params.zw;
vec2 n_end = f_params2.xy;
uint arc_bits = (flags >> 5u) & 3u;
h = f_params2.z;
float r = length(p_local);
d = max(inner - r, r - outer);
float r = length(p_local_ppx);
d = max(inner_radius_ppx - r, r - outer_radius_ppx);
if (arc_bits != 0u) {
float d_start = dot(p_local, n_start);
float d_end = dot(p_local, n_end);
float d_start = dot(p_local_ppx, n_start);
float d_end = dot(p_local_ppx, n_end);
float d_wedge = (arc_bits == 1u)
? max(d_start, d_end) // arc ≤ π: intersect half-planes
: min(d_start, d_end); // arc > π: union half-planes
d = max(d, d_wedge);
}
half_size = vec2(outer); // for gradient UV computation
half_size_ppx = vec2(outer_radius_ppx); // for gradient UV computation
}
// --- fwidth-based normalization for correct AA and stroke width ---
@@ -146,18 +146,18 @@ void main() {
if ((flags & 4u) != 0u) {
// Radial gradient (bit 2): t from distance to center
mediump float t = length(p_local / half_size);
mediump float t = length(p_local_ppx / half_size_ppx);
shape_color = gradient_2color(gradient_start, gradient_end, t);
} else {
// Linear gradient: direction pre-computed on CPU as (cos, sin) f16 pair
vec2 direction = unpackHalf2x16(f_effects.z);
mediump float t = dot(p_local / half_size, direction) * 0.5 + 0.5;
mediump float t = dot(p_local_ppx / half_size_ppx, direction) * 0.5 + 0.5;
shape_color = gradient_2color(gradient_start, gradient_end, t);
}
} else if ((flags & 1u) != 0u) {
// Textured (bit 0)
vec4 uv_rect = f_uv_rect;
vec2 local_uv = p_local / half_size * 0.5 + 0.5;
vec2 local_uv = p_local_ppx / half_size_ppx * 0.5 + 0.5;
vec2 uv = mix(uv_rect.xy, uv_rect.zw, local_uv);
shape_color = f_color * texture(tex, uv);
} else {
+31 -14
View File
@@ -1,6 +1,6 @@
#version 450 core
// ---------- Vertex attributes (used in both modes) ----------
// ---------- Vertex attributes (used in all modes) ----------
layout(location = 0) in vec2 v_position;
layout(location = 1) in vec2 v_uv;
layout(location = 2) in vec4 v_color;
@@ -16,10 +16,18 @@ layout(location = 6) flat out vec4 f_uv_rect;
layout(location = 7) flat out uvec4 f_effects;
// ---------- Uniforms (single block — avoids spirv-cross reordering on Metal) ----------
// Mode values mirror Core_2D_Mode in core_2d.odin:
// 0 = Tessellated v_position is in logical pixels; shader scales by dpi_scale.
// 1 = SDF v_position is a unit-quad corner; world-space comes from
// primitives[gl_InstanceIndex].bounds (logical px). Shader
// scales by dpi_scale.
// 2 = Text v_position is in *physical* pixels already (the CPU baked
// the anchor snap and SDL_ttf glyph offsets, both physical).
// Shader must NOT rescale.
layout(set = 1, binding = 0) uniform Uniforms {
mat4 projection;
float dpi_scale;
uint mode; // 0 = tessellated, 1 = SDF
uint mode;
};
// ---------- SDF primitive storage buffer ----------
@@ -44,18 +52,7 @@ layout(std430, set = 0, binding = 0) readonly buffer Core_2D_Primitives {
// ---------- Entry point ----------
void main() {
if (mode == 0u) {
// ---- Mode 0: Tessellated (used for text and arbitrary user geometry) ----
f_color = v_color;
f_local_or_uv = v_uv;
f_params = vec4(0.0);
f_params2 = vec4(0.0);
f_flags = 0u;
f_uv_rect = vec4(0.0);
f_effects = uvec4(0);
gl_Position = projection * vec4(v_position * dpi_scale, 0.0, 1.0);
} else {
if (mode == 1u) {
// ---- Mode 1: SDF instanced quads ----
Core_2D_Primitive p = primitives[gl_InstanceIndex];
@@ -86,5 +83,25 @@ void main() {
f_effects = p.effects;
gl_Position = projection * vec4(world_pos * dpi_scale, 0.0, 1.0);
} else {
// ---- Mode 0 (Tessellated) and Mode 2 (Text) ----
// Both feed the raw-vertex pipeline (kind 0 in the fragment shader).
// They differ only in what coord space `v_position` is in:
// Mode 0 — logical pixels, scale here by dpi_scale.
// Mode 2 — physical pixels (CPU pre-scaled and snapped to integer
// physical pixels for atlas-aligned bilinear sampling).
// Do NOT rescale.
// `mode` is uniform across the workgroup, so the select compiles to a
// uniform-controlled branch with no SIMT divergence cost.
f_color = v_color;
f_local_or_uv = v_uv;
f_params = vec4(0.0);
f_params2 = vec4(0.0);
f_flags = 0u;
f_uv_rect = vec4(0.0);
f_effects = uvec4(0);
vec2 pos = (mode == 2u) ? v_position : (v_position * dpi_scale);
gl_Position = projection * vec4(pos, 0.0, 1.0);
}
}