Skip to content

Commit

Permalink
[Impeller] Migrate gaussian blur to half precision. (#40800)
Browse files Browse the repository at this point in the history
[Impeller] Migrate gaussian blur to half precision.
  • Loading branch information
jonahwilliams authored Apr 1, 2023
1 parent 7a19241 commit c56d5fb
Show file tree
Hide file tree
Showing 7 changed files with 215 additions and 199 deletions.
40 changes: 21 additions & 19 deletions impeller/compiler/shader_lib/impeller/gaussian.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -6,51 +6,53 @@
#define GAUSSIAN_GLSL_

#include <impeller/constants.glsl>
#include <impeller/types.glsl>

/// Gaussian distribution function.
float IPGaussian(float x, float sigma) {
float variance = sigma * sigma;
return exp(-0.5 * x * x / variance) / (kSqrtTwoPi * sigma);
float16_t IPGaussian(float16_t x, float16_t sigma) {
float16_t variance = sigma * sigma;
return exp(-0.5hf * x * x / variance) / (float16_t(kSqrtTwoPi) * sigma);
}

/// Abramowitz and Stegun erf approximation.
float IPErf(float x) {
float a = abs(x);
float16_t IPErf(float16_t x) {
float16_t a = abs(x);
// 0.278393*x + 0.230389*x^2 + 0.078108*x^4 + 1
float b = (0.278393 + (0.230389 + 0.078108 * a * a) * a) * a + 1.0;
return sign(x) * (1 - 1 / (b * b * b * b));
float16_t b =
(0.278393hf + (0.230389hf + 0.078108hf * a * a) * a) * a + 1.0hf;
return sign(x) * (1.0hf - 1.0hf / (b * b * b * b));
}

/// Vec2 variation for the Abramowitz and Stegun erf approximation.
vec2 IPVec2Erf(vec2 x) {
vec2 a = abs(x);
f16vec2 IPVec2Erf(f16vec2 x) {
f16vec2 a = abs(x);
// 0.278393*x + 0.230389*x^2 + 0.078108*x^4 + 1
vec2 b = (0.278393 + (0.230389 + 0.078108 * a * a) * a) * a + 1.0;
return sign(x) * (1 - 1 / (b * b * b * b));
f16vec2 b = (0.278393hf + (0.230389hf + 0.078108hf * a * a) * a) * a + 1.0hf;
return sign(x) * (1.0hf - 1.0hf / (b * b * b * b));
}

/// The indefinite integral of the Gaussian function.
/// Uses a very close approximation of Erf.
float IPGaussianIntegral(float x, float sigma) {
float16_t IPGaussianIntegral(float16_t x, float16_t sigma) {
// ( 1 + erf( x * (sqrt(2) / (2 * sigma) ) ) / 2
return (1 + IPErf(x * (kHalfSqrtTwo / sigma))) * 0.5;
return (1.0hf + IPErf(x * (float16_t(kHalfSqrtTwo) / sigma))) * 0.5hf;
}

/// Vec2 variation for the indefinite integral of the Gaussian function.
/// Uses a very close approximation of Erf.
vec2 IPVec2GaussianIntegral(vec2 x, float sigma) {
f16vec2 IPVec2GaussianIntegral(f16vec2 x, float16_t sigma) {
// ( 1 + erf( x * (sqrt(2) / (2 * sigma) ) ) / 2
return (1 + IPVec2Erf(x * (kHalfSqrtTwo / sigma))) * 0.5;
return (1.0hf + IPVec2Erf(x * (float16_t(kHalfSqrtTwo) / sigma))) * 0.5hf;
}

/// Simpler (but less accurate) approximation of the Gaussian integral.
vec2 IPVec2FastGaussianIntegral(vec2 x, float sigma) {
return 1 / (1 + exp(-kSqrtThree / sigma * x));
f16vec2 IPVec2FastGaussianIntegral(f16vec2 x, float16_t sigma) {
return 1.0hf / (1.0hf + exp(float16_t(-kSqrtThree) / sigma * x));
}

/// Simple logistic sigmoid with a domain of [-1, 1] and range of [0, 1].
float IPSigmoid(float x) {
return 1.03731472073 / (1 + exp(-4 * x)) - 0.0186573603638;
float16_t IPSigmoid(float16_t x) {
return 1.03731472073hf / (1.0hf + exp(-4.0hf * x)) - 0.0186573603638hf;
}

#endif
9 changes: 9 additions & 0 deletions impeller/compiler/shader_lib/impeller/texture.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,15 @@ vec4 IPSampleDecal(sampler2D texture_sampler, vec2 coords) {
return texture(texture_sampler, coords);
}

/// Sample a texture with decal tile mode.
f16vec4 IPHalfSampleDecal(f16sampler2D texture_sampler, vec2 coords) {
if (any(lessThan(coords, vec2(0))) ||
any(greaterThanEqual(coords, vec2(1)))) {
return f16vec4(0.0);
}
return texture(texture_sampler, coords);
}

/// Sample a texture, emulating a specific tile mode.
///
/// This is useful for Impeller graphics backend that don't have native support
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,9 @@ std::optional<Entity> DirectionalGaussianBlurFilterContents::RenderFilter(
frag_info.blur_radius = r.radius;

// The blur direction is in input UV space.
frag_info.blur_direction =
pass_transform.Invert().TransformDirection(Vector2(1, 0)).Normalize();

frag_info.texture_size = Point(input_snapshot->GetCoverage().value().size);
frag_info.blur_uv_offset =
pass_transform.Invert().TransformDirection(Vector2(1, 0)).Normalize() /
Point(input_snapshot->GetCoverage().value().size);

Command cmd;
cmd.label = SPrintF("Gaussian Blur Filter (Radius=%.2f)",
Expand Down
40 changes: 20 additions & 20 deletions impeller/entity/shaders/border_mask_blur.frag
Original file line number Diff line number Diff line change
Expand Up @@ -15,42 +15,42 @@
// integral (using an erf approximation) to the 4 edges of the UV rectangle and
// multiplying them.

uniform sampler2D texture_sampler;
uniform f16sampler2D texture_sampler;

uniform FragInfo {
float src_factor;
float inner_blur_factor;
float outer_blur_factor;
float16_t src_factor;
float16_t inner_blur_factor;
float16_t outer_blur_factor;

vec2 sigma_uv;
f16vec2 sigma_uv;
}
frag_info;

in vec2 v_texture_coords;

out vec4 frag_color;
out f16vec4 frag_color;

float BoxBlurMask(vec2 uv) {
float16_t BoxBlurMask(f16vec2 uv) {
// LTRB
return IPGaussianIntegral(uv.x, frag_info.sigma_uv.x) * //
IPGaussianIntegral(uv.y, frag_info.sigma_uv.y) * //
IPGaussianIntegral(1 - uv.x, frag_info.sigma_uv.x) * //
IPGaussianIntegral(1 - uv.y, frag_info.sigma_uv.y);
return IPGaussianIntegral(uv.x, frag_info.sigma_uv.x) * //
IPGaussianIntegral(uv.y, frag_info.sigma_uv.y) * //
IPGaussianIntegral(1.0hf - uv.x, frag_info.sigma_uv.x) * //
IPGaussianIntegral(1.0hf - uv.y, frag_info.sigma_uv.y);
}

void main() {
vec4 image_color = texture(texture_sampler, v_texture_coords);
float blur_factor = BoxBlurMask(v_texture_coords);
f16vec4 image_color = texture(texture_sampler, v_texture_coords);
float16_t blur_factor = BoxBlurMask(f16vec2(v_texture_coords));

float within_bounds =
float(v_texture_coords.x >= 0 && v_texture_coords.y >= 0 &&
v_texture_coords.x < 1 && v_texture_coords.y < 1);
float inner_factor =
float16_t within_bounds =
float16_t(v_texture_coords.x >= 0.0 && v_texture_coords.y >= 0.0 &&
v_texture_coords.x < 1.0 && v_texture_coords.y < 1.0);
float16_t inner_factor =
(frag_info.inner_blur_factor * blur_factor + frag_info.src_factor) *
within_bounds;
float outer_factor =
frag_info.outer_blur_factor * blur_factor * (1 - within_bounds);
float16_t outer_factor =
frag_info.outer_blur_factor * blur_factor * (1.0hf - within_bounds);

float mask_factor = inner_factor + outer_factor;
float16_t mask_factor = inner_factor + outer_factor;
frag_color = image_color * mask_factor;
}
53 changes: 28 additions & 25 deletions impeller/entity/shaders/gaussian_blur/gaussian_blur.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -18,35 +18,34 @@
#include <impeller/texture.glsl>
#include <impeller/types.glsl>

uniform sampler2D texture_sampler;
uniform f16sampler2D texture_sampler;

uniform BlurInfo {
vec2 texture_size;
vec2 blur_direction;
f16vec2 blur_uv_offset;

// The blur sigma and radius have a linear relationship which is defined
// host-side, but both are useful controls here. Sigma (pixels per standard
// deviation) is used to define the gaussian function itself, whereas the
// radius is used to limit how much of the function is integrated.
float blur_sigma;
float blur_radius;
float16_t blur_sigma;
float16_t blur_radius;
}
blur_info;

#if ENABLE_ALPHA_MASK
uniform sampler2D alpha_mask_sampler;
uniform f16sampler2D alpha_mask_sampler;

uniform MaskInfo {
float src_factor;
float inner_blur_factor;
float outer_blur_factor;
float16_t src_factor;
float16_t inner_blur_factor;
float16_t outer_blur_factor;
}
mask_info;
#endif

vec4 Sample(sampler2D tex, vec2 coords) {
f16vec4 Sample(f16sampler2D tex, vec2 coords) {
#if ENABLE_DECAL_SPECIALIZATION
return IPSampleDecal(tex, coords);
return IPHalfSampleDecal(tex, coords);
#else
return texture(tex, coords);
#endif
Expand All @@ -55,31 +54,35 @@ vec4 Sample(sampler2D tex, vec2 coords) {
in vec2 v_texture_coords;
in vec2 v_src_texture_coords;

out vec4 frag_color;
out f16vec4 frag_color;

void main() {
vec4 total_color = vec4(0);
float gaussian_integral = 0;
vec2 blur_uv_offset = blur_info.blur_direction / blur_info.texture_size;
f16vec4 total_color = f16vec4(0.0hf);
float16_t gaussian_integral = 0.0hf;

for (float i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) {
float gaussian = IPGaussian(i, blur_info.blur_sigma);
for (float16_t i = -blur_info.blur_radius; i <= blur_info.blur_radius; i++) {
float16_t gaussian = IPGaussian(i, blur_info.blur_sigma);
gaussian_integral += gaussian;
total_color +=
gaussian *
Sample(texture_sampler, // sampler
v_texture_coords + blur_uv_offset * i // texture coordinates
);
gaussian * Sample(texture_sampler, // sampler
v_texture_coords + blur_info.blur_uv_offset *
i // texture coordinates
);
}

frag_color = total_color / gaussian_integral;

#if ENABLE_ALPHA_MASK
vec4 src_color = Sample(alpha_mask_sampler, // sampler
v_src_texture_coords // texture coordinates
f16vec4 src_color = Sample(alpha_mask_sampler, // sampler
v_src_texture_coords // texture coordinates
);
float blur_factor = mask_info.inner_blur_factor * float(src_color.a > 0) +
mask_info.outer_blur_factor * float(src_color.a == 0);

float16_t blur_factor;
if (src_color.a > 0.0hf) {
blur_factor = mask_info.inner_blur_factor;
} else if (src_color.a == 0.0hf) {
blur_factor = mask_info.outer_blur_factor;
}

frag_color = frag_color * blur_factor + src_color * mask_info.src_factor;
#endif
Expand Down
59 changes: 31 additions & 28 deletions impeller/entity/shaders/rrect_blur.frag
Original file line number Diff line number Diff line change
Expand Up @@ -6,58 +6,61 @@
#include <impeller/types.glsl>

uniform FragInfo {
vec4 color;
float blur_sigma;
vec2 rect_size;
float corner_radius;
f16vec4 color;
f16vec2 rect_size;
float16_t blur_sigma;
float16_t corner_radius;
}
frag_info;

in vec2 v_position;

out vec4 frag_color;
out f16vec4 frag_color;

const int kSampleCount = 4;

float RRectDistance(vec2 sample_position, vec2 half_size) {
vec2 space = abs(sample_position) - half_size + frag_info.corner_radius;
return length(max(space, 0.0)) + min(max(space.x, space.y), 0.0) -
frag_info.corner_radius;
float16_t RRectDistance(f16vec2 sample_position, f16vec2 half_size) {
f16vec2 space = abs(sample_position) - half_size + frag_info.corner_radius;
return length(max(space, float16_t(0.0hf))) +
min(max(space.x, space.y), float16_t(0.0hf)) - frag_info.corner_radius;
}

/// Closed form unidirectional rounded rect blur mask solution using the
/// analytical Gaussian integral (with approximated erf).
float RRectShadowX(vec2 sample_position, vec2 half_size) {
float16_t RRectShadowX(f16vec2 sample_position, f16vec2 half_size) {
// Compute the X direction distance field (not incorporating the Y distance)
// for the rounded rect.
float space =
min(0, half_size.y - frag_info.corner_radius - abs(sample_position.y));
float rrect_distance =
float16_t space =
min(float16_t(0.0hf),
half_size.y - frag_info.corner_radius - abs(sample_position.y));
float16_t rrect_distance =
half_size.x - frag_info.corner_radius +
sqrt(max(0, frag_info.corner_radius * frag_info.corner_radius -
space * space));
sqrt(max(
float16_t(0.0hf),
frag_info.corner_radius * frag_info.corner_radius - space * space));

// Map the linear distance field to the approximate Gaussian integral.
vec2 integral = IPVec2FastGaussianIntegral(
sample_position.x + vec2(-rrect_distance, rrect_distance),
f16vec2 integral = IPVec2FastGaussianIntegral(
sample_position.x + f16vec2(-rrect_distance, rrect_distance),
frag_info.blur_sigma);
return integral.y - integral.x;
}

float RRectShadow(vec2 sample_position, vec2 half_size) {
float16_t RRectShadow(f16vec2 sample_position, f16vec2 half_size) {
// Limit the sampling range to 3 standard deviations in the Y direction from
// the kernel center to incorporate 99.7% of the color contribution.
float half_sampling_range = frag_info.blur_sigma * 3;
float16_t half_sampling_range = frag_info.blur_sigma * 3.0hf;

float begin_y = max(-half_sampling_range, sample_position.y - half_size.y);
float end_y = min(half_sampling_range, sample_position.y + half_size.y);
float interval = (end_y - begin_y) / kSampleCount;
float16_t begin_y =
max(-half_sampling_range, sample_position.y - half_size.y);
float16_t end_y = min(half_sampling_range, sample_position.y + half_size.y);
float16_t interval = (end_y - begin_y) / float16_t(kSampleCount);

// Sample the X blur kSampleCount times, weighted by the Gaussian function.
float result = 0;
float16_t result = 0.0hf;
for (int sample_i = 0; sample_i < kSampleCount; sample_i++) {
float y = begin_y + interval * (sample_i + 0.5);
result += RRectShadowX(vec2(sample_position.x, sample_position.y - y),
float16_t y = begin_y + interval * (float16_t(sample_i) + 0.5hf);
result += RRectShadowX(f16vec2(sample_position.x, sample_position.y - y),
half_size) *
IPGaussian(y, frag_info.blur_sigma) * interval;
}
Expand All @@ -68,10 +71,10 @@ float RRectShadow(vec2 sample_position, vec2 half_size) {
void main() {
frag_color = frag_info.color;

vec2 half_size = frag_info.rect_size * 0.5;
vec2 sample_position = v_position - half_size;
f16vec2 half_size = frag_info.rect_size * 0.5hf;
f16vec2 sample_position = f16vec2(v_position) - half_size;

if (frag_info.blur_sigma > 0) {
if (frag_info.blur_sigma > 0.0hf) {
frag_color *= RRectShadow(sample_position, half_size);
} else {
frag_color *= -RRectDistance(sample_position, half_size);
Expand Down
Loading

0 comments on commit c56d5fb

Please sign in to comment.