Magpie/Effects/FSR_RCAS.hlsl
刘旭 5c05330d1f feat: 每个通道的入口改为 PassN
虽然Main更统一,但不易阅读
2022-03-04 15:34:22 +08:00

132 lines
3.4 KiB
HLSL

// FidelityFX-FSR 中 RCAS 通道
// 移植自 https://github.com/GPUOpen-Effects/FidelityFX-FSR/blob/master/ffx-fsr/ffx_fsr1.h
//!MAGPIE EFFECT
//!VERSION 2
//!OUTPUT_WIDTH INPUT_WIDTH
//!OUTPUT_HEIGHT INPUT_HEIGHT
//!PARAMETER
//!DEFAULT 0.87
//!MIN 1e-5
float sharpness;
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam;
//!PASS 1
//!IN INPUT
//!BLOCK_SIZE 16, 16
//!NUM_THREADS 64, 1, 1
#define min3(a, b, c) min(a, min(b, c))
#define max3(a, b, c) max(a, max(b, c))
// This is set at the limit of providing unnatural results for sharpening.
#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
float3 FsrRcasF(uint2 pos) {
// Algorithm uses minimal 3x3 pixel neighborhood.
// b
// d e f
// h
float3 b = INPUT.Load(int3(pos.x, pos.y - 1, 0)).rgb;
float3 d = INPUT.Load(int3(pos.x - 1, pos.y, 0)).rgb;
float3 e = INPUT.Load(int3(pos, 0)).rgb;
float3 f = INPUT.Load(int3(pos.x + 1, pos.y, 0)).rgb;
float3 h = INPUT.Load(int3(pos.x, pos.y + 1, 0)).rgb;
// Rename (32-bit) or regroup (16-bit).
float bR = b.r;
float bG = b.g;
float bB = b.b;
float dR = d.r;
float dG = d.g;
float dB = d.b;
float eR = e.r;
float eG = e.g;
float eB = e.b;
float fR = f.r;
float fG = f.g;
float fB = f.b;
float hR = h.r;
float hG = h.g;
float hB = h.b;
float nz;
// Luma times 2.
float bL = bB * 0.5 + (bR * 0.5 + bG);
float dL = dB * 0.5 + (dR * 0.5 + dG);
float eL = eB * 0.5 + (eR * 0.5 + eG);
float fL = fB * 0.5 + (fR * 0.5 + fG);
float hL = hB * 0.5 + (hR * 0.5 + hG);
// Noise detection.
nz = 0.25 * bL + 0.25 * dL + 0.25 * fL + 0.25 * hL - eL;
nz = saturate(abs(nz) * rcp(max3(max3(bL, dL, eL), fL, hL) - min3(min3(bL, dL, eL), fL, hL)));
nz = -0.5 * nz + 1.0;
// Min and max of ring.
float mn4R = min(min3(bR, dR, fR), hR);
float mn4G = min(min3(bG, dG, fG), hG);
float mn4B = min(min3(bB, dB, fB), hB);
float mx4R = max(max3(bR, dR, fR), hR);
float mx4G = max(max3(bG, dG, fG), hG);
float mx4B = max(max3(bB, dB, fB), hB);
// Immediate constants for peak range.
float2 peakC = { 1.0, -1.0 * 4.0 };
// Limiters, these need to be high precision RCPs.
float hitMinR = min(mn4R, eR) * rcp(4.0 * mx4R);
float hitMinG = min(mn4G, eG) * rcp(4.0 * mx4G);
float hitMinB = min(mn4B, eB) * rcp(4.0 * mx4B);
float hitMaxR = (peakC.x - max(mx4R, eR)) * rcp(4.0 * mn4R + peakC.y);
float hitMaxG = (peakC.x - max(mx4G, eG)) * rcp(4.0 * mn4G + peakC.y);
float hitMaxB = (peakC.x - max(mx4B, eB)) * rcp(4.0 * mn4B + peakC.y);
float lobeR = max(-hitMinR, hitMaxR);
float lobeG = max(-hitMinG, hitMaxG);
float lobeB = max(-hitMinB, hitMaxB);
float lobe = max(-FSR_RCAS_LIMIT, min(max3(lobeR, lobeG, lobeB), 0)) * sharpness;
// Apply noise removal.
lobe *= nz;
// Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
float rcpL = rcp(4.0 * lobe + 1.0);
float3 c = {
(lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL,
(lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL,
(lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL
};
return c;
}
void Pass1(uint2 blockStart, uint3 threadId) {
uint2 gxy = blockStart + Rmp8x8(threadId.x);
if (!CheckViewport(gxy)) {
return;
}
WriteToOutput(gxy, FsrRcasF(gxy));
gxy.x += 8u;
if (CheckViewport(gxy)) {
WriteToOutput(gxy, FsrRcasF(gxy));
}
gxy.y += 8u;
if (CheckViewport(gxy)) {
WriteToOutput(gxy, FsrRcasF(gxy));
}
gxy.x -= 8u;
if (CheckViewport(gxy)) {
WriteToOutput(gxy, FsrRcasF(gxy));
}
}