mirror of
https://github.com/Blinue/Magpie.git
synced 2026-06-24 02:04:10 +00:00
perf: 优化 Anime4K_Denoise_Bilateral_Mode 的性能
This commit is contained in:
parent
88383e0e6a
commit
4708cc812e
1 changed files with 78 additions and 37 deletions
|
|
@ -1,3 +1,4 @@
|
|||
// Anime4K_Denoise_Bilateral_Mode
|
||||
// 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Denoise/Anime4K_Denoise_Bilateral_Mode.glsl
|
||||
|
||||
|
||||
|
|
@ -21,9 +22,9 @@ SamplerState sam;
|
|||
|
||||
|
||||
//!PASS 1
|
||||
//!STYLE PS
|
||||
//!IN INPUT
|
||||
|
||||
//!BLOCK_SIZE 16,16
|
||||
//!NUM_THREADS 64,1,1
|
||||
|
||||
#define INTENSITY_SIGMA intensitySigma //Intensity window size, higher is stronger denoise, must be a positive real number
|
||||
#define SPATIAL_SIGMA 1.0 //Spatial window size, higher is stronger denoise, must be a positive real number.
|
||||
|
|
@ -46,49 +47,89 @@ float gaussian(float x, float s, float m) {
|
|||
return exp(-0.5 * scaled * scaled);
|
||||
}
|
||||
|
||||
float4 Pass1(float2 pos) {
|
||||
float3 histogram_v[KERNELLEN];
|
||||
float histogram_l[KERNELLEN];
|
||||
float histogram_w[KERNELLEN];
|
||||
float histogram_wn[KERNELLEN];
|
||||
void Pass1(uint2 blockStart, uint3 threadId) {
|
||||
uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
|
||||
if (!CheckViewport(gxy)) {
|
||||
return;
|
||||
}
|
||||
|
||||
float vc = get_luma(INPUT.SampleLevel(sam, pos, 0).rgb);
|
||||
|
||||
float is = pow(vc + 0.0001, INTENSITY_POWER_CURVE) * INTENSITY_SIGMA;
|
||||
float ss = SPATIAL_SIGMA;
|
||||
|
||||
uint i;
|
||||
float2 inputPt = GetInputPt();
|
||||
uint i, j, k, m;
|
||||
|
||||
float4 src[KERNELSIZE + 1][KERNELSIZE + 1];
|
||||
[unroll]
|
||||
for (i = 0; i < KERNELLEN; i++) {
|
||||
float2 ipos = GETOFFSET(i);
|
||||
histogram_v[i] = INPUT.SampleLevel(sam, pos + ipos * inputPt, 0).rgb;
|
||||
histogram_l[i] = get_luma(histogram_v[i]);
|
||||
histogram_w[i] = gaussian(histogram_l[i], is, vc) * gaussian(length(ipos), ss, 0.0);
|
||||
histogram_wn[i] = 0.0;
|
||||
}
|
||||
for (i = 0; i <= KERNELSIZE - 1; i += 2) {
|
||||
[unroll]
|
||||
for (j = 0; j <= KERNELSIZE - 1; j += 2) {
|
||||
float2 tpos = (gxy + int2(i, j) - KERNELHALFSIZE + 1) * inputPt;
|
||||
const float4 sr = INPUT.GatherRed(sam, tpos);
|
||||
const float4 sg = INPUT.GatherGreen(sam, tpos);
|
||||
const float4 sb = INPUT.GatherBlue(sam, tpos);
|
||||
|
||||
[unroll]
|
||||
for (i = 0; i < KERNELLEN; i++) {
|
||||
histogram_wn[i] += gaussian(0.0, HISTOGRAM_REGULARIZATION, 0.0) * histogram_w[i];
|
||||
for (uint j = (i + 1); j < KERNELLEN; j++) {
|
||||
float d = gaussian(histogram_l[j], HISTOGRAM_REGULARIZATION, histogram_l[i]);
|
||||
histogram_wn[j] += d * histogram_w[i];
|
||||
histogram_wn[i] += d * histogram_w[j];
|
||||
// w z
|
||||
// x y
|
||||
src[i][j] = float4(sr.w, sg.w, sb.w, get_luma(float3(sr.w, sg.w, sb.w)));
|
||||
src[i][j + 1] = float4(sr.x, sg.x, sb.x, get_luma(float3(sr.x, sg.x, sb.x)));
|
||||
src[i + 1][j] = float4(sr.z, sg.z, sb.z, get_luma(float3(sr.z, sg.z, sb.z)));
|
||||
src[i + 1][j + 1] = float4(sr.y, sg.y, sb.y, get_luma(float3(sr.y, sg.y, sb.y)));
|
||||
}
|
||||
}
|
||||
|
||||
float3 maxv = 0;
|
||||
float maxw = 0;
|
||||
|
||||
[unroll]
|
||||
for (i = 0; i < KERNELLEN; ++i) {
|
||||
if (histogram_wn[i] >= maxw) {
|
||||
maxw = histogram_wn[i];
|
||||
maxv = histogram_v[i];
|
||||
for (i = 0; i <= 1; ++i) {
|
||||
[unroll]
|
||||
for (j = 0; j <= 1; ++j) {
|
||||
const uint2 destPos = gxy + uint2(i, j);
|
||||
|
||||
if (i != 0 && j != 0) {
|
||||
if (!CheckViewport(gxy)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
float3 histogram_v[KERNELLEN];
|
||||
float histogram_l[KERNELLEN];
|
||||
float histogram_w[KERNELLEN];
|
||||
float histogram_wn[KERNELLEN];
|
||||
|
||||
float vc = src[KERNELHALFSIZE + i][KERNELHALFSIZE + j].a;
|
||||
|
||||
float is = pow(vc + 0.0001, INTENSITY_POWER_CURVE) * INTENSITY_SIGMA;
|
||||
float ss = SPATIAL_SIGMA;
|
||||
|
||||
[unroll]
|
||||
for (k = 0; k < KERNELLEN; k++) {
|
||||
const int2 ipos = GETOFFSET(k);
|
||||
const uint2 idx = uint2(i, j) + ipos.yx + KERNELHALFSIZE;
|
||||
histogram_v[k] = src[idx.x][idx.y].rgb;
|
||||
histogram_l[k] = src[idx.x][idx.y].a;
|
||||
histogram_w[k] = gaussian(histogram_l[k], is, vc) * gaussian(length(ipos), ss, 0.0);
|
||||
histogram_wn[k] = 0.0;
|
||||
}
|
||||
|
||||
[unroll]
|
||||
for (k = 0; k < KERNELLEN; k++) {
|
||||
histogram_wn[k] += gaussian(0.0, HISTOGRAM_REGULARIZATION, 0.0) * histogram_w[k];
|
||||
[unroll]
|
||||
for (uint m = (k + 1); m < KERNELLEN; m++) {
|
||||
float d = gaussian(histogram_l[m], HISTOGRAM_REGULARIZATION, histogram_l[k]);
|
||||
histogram_wn[m] += d * histogram_w[k];
|
||||
histogram_wn[k] += d * histogram_w[m];
|
||||
}
|
||||
}
|
||||
|
||||
float3 maxv = 0;
|
||||
float maxw = 0;
|
||||
|
||||
[unroll]
|
||||
for (k = 0; k < KERNELLEN; ++k) {
|
||||
if (histogram_wn[k] >= maxw) {
|
||||
maxw = histogram_wn[k];
|
||||
maxv = histogram_v[k];
|
||||
}
|
||||
}
|
||||
|
||||
WriteToOutput(destPos, maxv);
|
||||
}
|
||||
}
|
||||
|
||||
return float4(maxv, 1);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue