perf: 优化部分效果

This commit is contained in:
Xu 2026-04-09 13:03:36 +08:00
commit 49d5a7d24d
12 changed files with 43 additions and 49 deletions

View file

@ -460,17 +460,11 @@ void Pass1(uint2 blockStart, uint3 threadId) {
OUTPUT[gxy] = MF4(FsrEasu(gxy, con0, con1, con2, con3), 1);
gxy.x += 8u;
if (gxy.x < outputSize.x && gxy.y < outputSize.y) {
OUTPUT[gxy] = MF4(FsrEasu(gxy, con0, con1, con2, con3), 1);
}
OUTPUT[gxy] = MF4(FsrEasu(gxy, con0, con1, con2, con3), 1);
gxy.y += 8u;
if (gxy.x < outputSize.x && gxy.y < outputSize.y) {
OUTPUT[gxy] = MF4(FsrEasu(gxy, con0, con1, con2, con3), 1);
}
OUTPUT[gxy] = MF4(FsrEasu(gxy, con0, con1, con2, con3), 1);
gxy.x -= 8u;
if (gxy.x < outputSize.x && gxy.y < outputSize.y) {
OUTPUT[gxy] = MF4(FsrEasu(gxy, con0, con1, con2, con3), 1);
}
OUTPUT[gxy] = MF4(FsrEasu(gxy, con0, con1, con2, con3), 1);
}

View file

@ -163,17 +163,10 @@
#define NVU2 uint2
#define NVB bool
#if NIS_USE_HALF_PRECISION
#if NIS_HLSL_6_2
#define NVH float16_t
#define NVH2 float16_t2
#define NVH3 float16_t3
#define NVH4 float16_t4
#else
#define NVH min16float
#define NVH2 min16float2
#define NVH3 min16float3
#define NVH4 min16float4
#endif // NIS_HLSL_6_2
#define NVH MF
#define NVH2 MF2
#define NVH3 MF3
#define NVH4 MF4
#else // FP32 types
#define NVH NVF
#define NVH2 NVF2

View file

@ -1,5 +1,6 @@
//!MAGPIE EFFECT
//!VERSION 4
//!VERSION 5
//!CAPABILITY AdvancedColor
//!TEXTURE
Texture2D INPUT;

View file

@ -2,7 +2,7 @@
// 移植自 https://github.com/SnapdragonStudios/snapdragon-gsr/blob/main/sgsr/v1/include/hlsl/sgsr1_shader_mobile.hlsl
//!MAGPIE EFFECT
//!VERSION 4
//!VERSION 5
//!PARAMETER
//!LABEL Edge Sharpness
@ -154,8 +154,8 @@ float3 SgsrYuvH(float2 uv, float4 con1)
float deltaY = finalY - pix_G;
pix = saturate(pix+deltaY);
}
pix += deltaY;
}
return pix;
}

View file

@ -41,7 +41,7 @@ SamplerState LinearSampler;
//!COMMON
#define SMAA_RT_METRICS float4(GetInputPt(), GetInputSize())
static float4 SMAA_RT_METRICS = { GetInputPt(), GetInputSize() };
#define SMAA_LINEAR_SAMPLER LinearSampler
#define SMAA_POINT_SAMPLER PointSampler
#define SMAA_PRESET_HIGH

View file

@ -41,7 +41,7 @@ SamplerState LinearSampler;
//!COMMON
#define SMAA_RT_METRICS float4(GetInputPt(), GetInputSize())
static float4 SMAA_RT_METRICS = { GetInputPt(), GetInputSize() };
#define SMAA_LINEAR_SAMPLER LinearSampler
#define SMAA_POINT_SAMPLER PointSampler
#define SMAA_PRESET_LOW

View file

@ -41,7 +41,7 @@ SamplerState LinearSampler;
//!COMMON
#define SMAA_RT_METRICS float4(GetInputPt(), GetInputSize())
static float4 SMAA_RT_METRICS = { GetInputPt(), GetInputSize() };
#define SMAA_LINEAR_SAMPLER LinearSampler
#define SMAA_POINT_SAMPLER PointSampler
#define SMAA_PRESET_MEDIUM

View file

@ -41,7 +41,7 @@ SamplerState LinearSampler;
//!COMMON
#define SMAA_RT_METRICS float4(GetInputPt(), GetInputSize())
static float4 SMAA_RT_METRICS = { GetInputPt(), GetInputSize() };
#define SMAA_LINEAR_SAMPLER LinearSampler
#define SMAA_POINT_SAMPLER PointSampler
#define SMAA_PRESET_ULTRA

View file

@ -4,7 +4,6 @@
// Adaptive sharpen - version 2015-05-15 - (requires ps >= 3.0)
// Tuned for use post resize, EXPECTS FULL RANGE GAMMA LIGHT
//!MAGPIE EFFECT
//!VERSION 5
//!SCALE_FACTOR 1
@ -30,13 +29,19 @@ Texture2D OUTPUT;
//!FILTER POINT
SamplerState sam;
//!PASS 1
//!IN INPUT
//!OUT OUTPUT
//!BLOCK_SIZE 16
//!NUM_THREADS 64
// DXC 编译时展开某些循环会大幅降低性能
#ifdef MP_SM_6_0
#define CONDITIONAL_UNROLL
#else
#define CONDITIONAL_UNROLL [unroll]
#endif
// Defined values under this row are "optimal" DO NOT CHANGE IF YOU DO NOT KNOW WHAT YOU ARE DOING!
#define curveslope (curveHeight*1.5f) // Sharpening curve slope, edge region
@ -46,9 +51,9 @@ SamplerState sam;
#define L_comp_ratio 0.167f // Max compression ratio, light overshoot (1/0.167=6x)
#define max_scale_lim 10.0f // Abs change before max compression (1/10=±10%)
// 效果工作在线性 RGB 空间,应使用 GetLuminance 计算亮度
// Colour to greyscale, fast approx gamma
float CtG(float3 RGB) { return sqrt((1.0f / 3.0f) * ((RGB * RGB).r + (RGB * RGB).g + (RGB * RGB).b)); }
// float CtG(float3 RGB) { return sqrt((1.0f / 3.0f) * ((RGB * RGB).r + (RGB * RGB).g + (RGB * RGB).b)); }
void Pass1(uint2 blockStart, uint3 threadId) {
uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
@ -66,11 +71,6 @@ void Pass1(uint2 blockStart, uint3 threadId) {
for (i = 0; i <= 6; i += 2) {
[unroll]
for (j = 0; j <= 6; j += 2) {
// 四角共 16 个纹素无需采样
if ((i == 0 && j == 0) || (i == 6 && j == 0) || (i == 0 && j == 6) || (i == 6 && j == 6)) {
continue;
}
float2 tpos = ((int2)gxy + int2(i, j) - 2) * inputPt;
const float4 sr = INPUT.GatherRed(sam, tpos);
const float4 sg = INPUT.GatherGreen(sam, tpos);
@ -79,19 +79,19 @@ void Pass1(uint2 blockStart, uint3 threadId) {
// w z
// x y
src[i][j].rgb = float3(sr.w, sg.w, sb.w);
src[i][j].w = CtG(src[i][j].rgb);
src[i][j].w = GetLuminance(src[i][j].rgb);
src[i][j + 1].rgb = float3(sr.x, sg.x, sb.x);
src[i][j + 1].w = CtG(src[i][j + 1].rgb);
src[i][j + 1].w = GetLuminance(src[i][j + 1].rgb);
src[i + 1][j].rgb = float3(sr.z, sg.z, sb.z);
src[i + 1][j].w = CtG(src[i + 1][j].rgb);
src[i + 1][j].w = GetLuminance(src[i + 1][j].rgb);
src[i + 1][j + 1].rgb = float3(sr.y, sg.y, sb.y);
src[i + 1][j + 1].w = CtG(src[i + 1][j + 1].rgb);
src[i + 1][j + 1].w = GetLuminance(src[i + 1][j + 1].rgb);
}
}
[unroll]
CONDITIONAL_UNROLL
for (i = 0; i <= 1; ++i) {
[unroll]
CONDITIONAL_UNROLL
for (j = 0; j <= 1; ++j) {
const uint2 destPos = gxy + uint2(i, j);

View file

@ -385,12 +385,12 @@ winrt::fire_and_forget EffectsService::_CompileShaderEffectAsync(
std::vector<const wchar_t*> arguments;
arguments.push_back(L"-HV");
arguments.push_back(L"2018");
arguments.push_back(L"-E");
arguments.push_back(L"__M");
arguments.push_back(L"-all-resources-bound");
arguments.push_back(L"-ffinite-math-only");
arguments.push_back(L"-T");
const wchar_t* profile;
switch (shaderModel) {
@ -443,12 +443,17 @@ winrt::fire_and_forget EffectsService::_CompileShaderEffectAsync(
}
}
#ifndef _DEBUG
#ifdef _DEBUG
arguments.push_back(L"-Od");
arguments.push_back(L"-Zi");
arguments.push_back(L"-Qembed_debug");
#else
arguments.push_back(L"-O3");
// 剥离反射信息以减小体积
arguments.push_back(L"-Qstrip_reflect");
#endif
arguments.push_back(L"-I");
arguments.push_back(includeDir.c_str());

View file

@ -45,8 +45,9 @@
<VariableName>%(Filename)</VariableName>
<HeaderFileOutput>$(GeneratedFilesDir)\shaders\%(Filename).h</HeaderFileOutput>
<ObjectFileOutput />
<AdditionalOptions>-ffinite-math-only %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)' == 'Debug'">-Qembed_debug %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)' == 'Release'">-O3 %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)' == 'Release'">-O3 -Qstrip_reflect %(AdditionalOptions)</AdditionalOptions>
</FxCompile>
</ItemDefinitionGroup>
<ItemGroup>

View file

@ -12,7 +12,7 @@ namespace Magpie {
// 当前 MagpieFX 版本
static constexpr uint32_t MAGPIE_FX_VERSION = 5;
// 向后兼容的最低版本
static constexpr uint32_t MAGPIE_FX_MIN_SUPPORTED_VERSION = 4;
static constexpr uint32_t MAGPIE_FX_MIN_SUPPORTED_VERSION = 5;
// 必须出现在一行的开头才视为指令
static const char* META_INDICATOR = "//!";