perf: 优化部分效果和修复渲染错误

This commit is contained in:
Xu 2026-04-16 16:46:01 +08:00
commit 7bc06cc10d
7 changed files with 91 additions and 114 deletions

View file

@ -64,7 +64,6 @@ float4 weight4(float x) {
);
}
float4 Pass1(float2 pos) {
const float2 inputPt = GetInputPt();
const float2 inputSize = GetInputSize();

View file

@ -10,12 +10,11 @@
// B = 0.825 to get rid of dithering. Increase B to get a fine sharpness, though dithering returns.
//!MAGPIE EFFECT
//!VERSION 4
//!USE MulAdd
//!VERSION 5
//!CAPABILITY AdvancedColor
#include "StubDefs.hlsli"
//!PARAMETER
//!LABEL Window Sinc Param
//!DEFAULT 0.5
@ -50,7 +49,6 @@ Texture2D OUTPUT;
//!FILTER POINT
SamplerState sam;
//!PASS 1
//!IN INPUT
//!OUT OUTPUT
@ -62,7 +60,6 @@ SamplerState sam;
#define min4(a, b, c, d) min(min(a, b), min(c, d))
#define max4(a, b, c, d) max(max(a, b), max(c, d))
float d(float2 pt1, float2 pt2) {
float2 v = pt2 - pt1;
return sqrt(dot(v, v));
@ -108,9 +105,9 @@ void Pass1(uint2 blockStart, uint3 threadId) {
[unroll]
for (uint j = 0; j <= 2; j += 2) {
float2 tpos = (tc + uint2(i, j)) * inputPt;
const float4 sr = INPUT.GatherRed(sam, tpos);
const float4 sg = INPUT.GatherGreen(sam, tpos);
const float4 sb = INPUT.GatherBlue(sam, tpos);
float4 sr = INPUT.GatherRed(sam, tpos);
float4 sg = INPUT.GatherGreen(sam, tpos);
float4 sb = INPUT.GatherBlue(sam, tpos);
// w z
// x y
@ -128,11 +125,9 @@ void Pass1(uint2 blockStart, uint3 threadId) {
color *= rcp(dot(mul(weights, float4(1, 1, 1, 1)), 1));
// 抗振铃
// Get min/max samples
float3 min_sample = min4(src[1][1], src[2][1], src[1][2], src[2][2]);
float3 max_sample = max4(src[1][1], src[2][1], src[1][2], src[2][2]);
color = lerp(color, clamp(color, min_sample, max_sample), ARStrength);
// final sum and weight normalization
OUTPUT[gxy] = float4(color, 1);
}

View file

@ -2,9 +2,8 @@
// 移植自 https://gist.github.com/igv/36508af3ffc84410fe39761d6969be10
// 原始文件使用了大量 mpv 的“特性”,因此可能存在移植错误。如果你熟悉 mpv hook请帮助我们改进
//!MAGPIE EFFECT
//!VERSION 4
//!VERSION 5
//!PARAMETER
//!LABEL Oversharp
@ -41,7 +40,7 @@ Texture2D MR;
//!TEXTURE
//!WIDTH OUTPUT_WIDTH
//!HEIGHT OUTPUT_HEIGHT
//!FORMAT R8G8B8A8_UNORM
//!FORMAT COLOR_SPACE_ADAPTIVE
Texture2D POSTKERNEL;
//!SAMPLER
@ -52,7 +51,6 @@ SamplerState sam;
//!FILTER LINEAR
SamplerState sam1;
//!PASS 1
//!DESC CatumllRom
//!STYLE PS
@ -60,58 +58,60 @@ SamplerState sam1;
//!OUT POSTKERNEL
// 模拟 mpv 的内置缩放CatmullRom
// Samples a texture with Catmull-Rom filtering, using 9 texture fetches instead of 16.
// See http://vec3.ca/bicubic-filtering-in-fewer-taps/ for more details
float4 weight4(float x) {
// Sharper version. May look better in some cases. B=0, C=0.75
return float4(
((-0.75 * x + 1.5) * x - 0.75) * x,
(1.25 * x - 2.25) * x * x + 1.0,
((-1.25 * x + 1.5) * x + 0.75) * x,
(0.75 * x - 0.75) * x * x
);
}
float4 Pass1(float2 pos) {
float2 inputSize = GetInputSize();
float2 inputPt = GetInputPt();
const float2 inputPt = GetInputPt();
const float2 inputSize = GetInputSize();
// We're going to sample a a 4x4 grid of texels surrounding the target UV coordinate. We'll do this by rounding
// down the sample location to get the exact center of our "starting" texel. The starting texel will be at
// location [1, 1] in the grid, where [0, 0] is the top left corner.
float2 samplePos = pos * inputSize;
float2 texPos1 = floor(samplePos - 0.5f) + 0.5f;
pos *= inputSize;
float2 pos1 = floor(pos - 0.5) + 0.5;
float2 f = pos - pos1;
// Compute the fractional offset from our starting texel to our original sample location, which we'll
// feed into the Catmull-Rom spline function to get our filter weights.
float2 f = samplePos - texPos1;
float4 rowtaps = weight4(f.x);
float4 coltaps = weight4(f.y);
// Compute the Catmull-Rom weights using the fractional offset that we calculated earlier.
// These equations are pre-expanded based on our knowledge of where the texels will be located,
// which lets us avoid having to evaluate a piece-wise function.
float2 w0 = f * (-0.5f + f * (1.0f - 0.5f * f));
float2 w1 = 1.0f + f * f * (-2.5f + 1.5f * f);
float2 w2 = f * (0.5f + f * (2.0f - 1.5f * f));
float2 w3 = f * f * (-0.5f + 0.5f * f);
float2 uv1 = pos1 * inputPt;
float2 uv0 = uv1 - inputPt;
float2 uv2 = uv1 + inputPt;
float2 uv3 = uv2 + inputPt;
// Work out weighting factors and sampling offsets that will let us use bilinear filtering to
// simultaneously evaluate the middle 2 samples from the 4x4 grid.
float2 w12 = w1 + w2;
float2 offset12 = w2 / (w1 + w2);
float u_weight_sum = rowtaps.y + rowtaps.z;
float u_middle_offset = rowtaps.z * inputPt.x / u_weight_sum;
float u_middle = uv1.x + u_middle_offset;
// Compute the final UV coordinates we'll use for sampling the texture
float2 texPos0 = texPos1 - 1;
float2 texPos3 = texPos1 + 2;
float2 texPos12 = texPos1 + offset12;
float v_weight_sum = coltaps.y + coltaps.z;
float v_middle_offset = coltaps.z * inputPt.y / v_weight_sum;
float v_middle = uv1.y + v_middle_offset;
texPos0 *= inputPt;
texPos3 *= inputPt;
texPos12 *= inputPt;
int2 coord_top_left = int2(max(uv0 * inputSize, 0.5));
int2 coord_bottom_right = int2(min(uv3 * inputSize, inputSize - 0.5));
float4 result = 0.0f;
result += INPUT.SampleLevel(sam1, float2(texPos0.x, texPos0.y), 0) * w0.x * w0.y;
result += INPUT.SampleLevel(sam1, float2(texPos12.x, texPos0.y), 0) * w12.x * w0.y;
result += INPUT.SampleLevel(sam1, float2(texPos3.x, texPos0.y), 0) * w3.x * w0.y;
float3 top = INPUT.Load(int3(coord_top_left, 0)).rgb * rowtaps.x;
top += INPUT.SampleLevel(sam1, float2(u_middle, uv0.y), 0).rgb * u_weight_sum;
top += INPUT.Load(int3(coord_bottom_right.x, coord_top_left.y, 0)).rgb * rowtaps.w;
float3 total = top * coltaps.x;
result += INPUT.SampleLevel(sam1, float2(texPos0.x, texPos12.y), 0) * w0.x * w12.y;
result += INPUT.SampleLevel(sam1, float2(texPos12.x, texPos12.y), 0) * w12.x * w12.y;
result += INPUT.SampleLevel(sam1, float2(texPos3.x, texPos12.y), 0) * w3.x * w12.y;
float3 middle = INPUT.SampleLevel(sam1, float2(uv0.x, v_middle), 0).rgb * rowtaps.x;
middle += INPUT.SampleLevel(sam1, float2(u_middle, v_middle), 0).rgb * u_weight_sum;
middle += INPUT.SampleLevel(sam1, float2(uv3.x, v_middle), 0).rgb * rowtaps.w;
total += middle * v_weight_sum;
result += INPUT.SampleLevel(sam1, float2(texPos0.x, texPos3.y), 0) * w0.x * w3.y;
result += INPUT.SampleLevel(sam1, float2(texPos12.x, texPos3.y), 0) * w12.x * w3.y;
result += INPUT.SampleLevel(sam1, float2(texPos3.x, texPos3.y), 0) * w3.x * w3.y;
float3 bottom = INPUT.Load(int3(coord_top_left.x, coord_bottom_right.y, 0)).rgb * rowtaps.x;
bottom += INPUT.SampleLevel(sam1, float2(u_middle, uv3.y), 0).rgb * u_weight_sum;
bottom += INPUT.Load(int3(coord_bottom_right, 0)).rgb * rowtaps.w;
total += bottom * coltaps.w;
return result;
return float4(total, 1);
}
//!PASS 2
@ -124,7 +124,6 @@ float4 Pass1(float2 pos) {
#define Kernel(x) MN(0.0f, 0.5f, abs(x))
#define taps 2.0f
float4 Pass2(float2 pos) {
const float inputPtY = GetInputPt().y;
const uint inputHeight = GetInputSize().y;
@ -152,7 +151,6 @@ float4 Pass2(float2 pos) {
return float4(avg, 1);
}
//!PASS 3
//!DESC L2 pass 2
//!STYLE PS
@ -163,7 +161,6 @@ float4 Pass2(float2 pos) {
#define Kernel(x) MN(0.0, 0.5, abs(x))
#define taps 2.0
float4 Pass3(float2 pos) {
const float inputPtX = GetInputPt().x;
const uint inputWidth = GetInputSize().x;
@ -190,7 +187,6 @@ float4 Pass3(float2 pos) {
return float4(avg, 1);
}
//!PASS 4
//!DESC mean & R
//!IN L2_2, POSTKERNEL
@ -207,7 +203,6 @@ float4 Pass3(float2 pos) {
#define Luma(rgb) ( dot(rgb, float3(0.2126, 0.7152, 0.0722)) )
void Pass4(uint2 blockStart, uint3 threadId) {
uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
uint2 outputSize = GetOutputSize();
@ -224,7 +219,7 @@ void Pass4(uint2 blockStart, uint3 threadId) {
for (i = 0; i < taps; i += 2) {
[unroll]
for (j = 0; j < taps; j += 2) {
const float2 tpos = (int2(gxy + uint2(i, j)) - taps / 2 + 1) * outputPt;
float2 tpos = (int2(gxy + uint2(i, j)) - taps / 2 + 1) * outputPt;
float4 sr = POSTKERNEL.GatherRed(sam, tpos);
float4 sg = POSTKERNEL.GatherGreen(sam, tpos);
float4 sb = POSTKERNEL.GatherBlue(sam, tpos);
@ -258,13 +253,7 @@ void Pass4(uint2 blockStart, uint3 threadId) {
[unroll]
for (j = 0; j <= 1; ++j) {
uint2 destPos = gxy + uint2(i, j);
if (i != 0 || j != 0) {
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
continue;
}
}
float W = 0.0;
float3x3 avg = 0;
@ -293,7 +282,6 @@ void Pass4(uint2 blockStart, uint3 threadId) {
}
}
//!PASS 5
//!DESC final pass
//!IN MR, POSTKERNEL
@ -307,7 +295,6 @@ void Pass4(uint2 blockStart, uint3 threadId) {
// taps 需为奇数
#define taps 3
void Pass5(uint2 blockStart, uint3 threadId) {
const uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
@ -324,11 +311,11 @@ void Pass5(uint2 blockStart, uint3 threadId) {
for (i = 0; i < taps; i += 2) {
[unroll]
for (j = 0; j < taps; j += 2) {
const float2 tpos = (int2(gxy + uint2(i, j)) - taps / 2 + 1) * outputPt;
const float4 sr = MR.GatherRed(sam, tpos);
const float4 sg = MR.GatherGreen(sam, tpos);
const float4 sb = MR.GatherBlue(sam, tpos);
const float4 sa = MR.GatherAlpha(sam, tpos);
float2 tpos = (int2(gxy + uint2(i, j)) - taps / 2 + 1) * outputPt;
float4 sr = MR.GatherRed(sam, tpos);
float4 sg = MR.GatherGreen(sam, tpos);
float4 sb = MR.GatherBlue(sam, tpos);
float4 sa = MR.GatherAlpha(sam, tpos);
// w z
// x y
@ -340,10 +327,10 @@ void Pass5(uint2 blockStart, uint3 threadId) {
}
float3 src2[2][2];
const float2 tpos = (gxy + 1) * outputPt;
const float4 sr = POSTKERNEL.GatherRed(sam, tpos);
const float4 sg = POSTKERNEL.GatherGreen(sam, tpos);
const float4 sb = POSTKERNEL.GatherBlue(sam, tpos);
float2 tpos = (gxy + 1) * outputPt;
float4 sr = POSTKERNEL.GatherRed(sam, tpos);
float4 sg = POSTKERNEL.GatherGreen(sam, tpos);
float4 sb = POSTKERNEL.GatherBlue(sam, tpos);
// w z
// x y

View file

@ -512,6 +512,7 @@ winrt::fire_and_forget EffectsService::_CompileShaderEffectAsync(
// 解析失败
auto lk = _shaderEffectCacheLock.lock_exclusive();
_shaderEffectCache.erase(cacheKey);
co_return;
}
if (saveSources) {

View file

@ -41,7 +41,7 @@ struct ShaderEffectTextureFormatProps {
static constexpr ShaderEffectTextureFormatProps SHADER_TEXTURE_FORMAT_PROPS[] = {
{"UNKNOWN", DXGI_FORMAT_UNKNOWN, 4, nullptr, nullptr},
{"COLOR_SPACE_ADAPTIVE", DXGI_FORMAT_UNKNOWN, 4, "MF", "MF"},
{"COLOR_SPACE_ADAPTIVE", DXGI_FORMAT_UNKNOWN, 4, "MF4", "MF4"},
{"R8_UNORM", DXGI_FORMAT_R8_UNORM, 1, "MF", "unorm float"},
{"R8_SNORM", DXGI_FORMAT_R8_SNORM, 1, "MF", "snorm float"},
{"R16_UNORM", DXGI_FORMAT_R16_UNORM, 1, "MF", "unorm float"},

View file

@ -66,7 +66,7 @@ void ShaderEffectDrawer::Bind(SizeU inputSize, SizeU outputSize, const ColorInfo
} else {
_inputSize = inputSize;
_outputSize = outputSize;
_shouldCreateTextures = true;
_shouldUpdateSizeDependentResources = true;
}
bool wasSrgb = _colorInfo.kind != winrt::AdvancedColorKind::StandardDynamicRange;
@ -75,11 +75,6 @@ void ShaderEffectDrawer::Bind(SizeU inputSize, SizeU outputSize, const ColorInfo
if (!_compilationTaskId.empty()) {
if (wasSrgb == isSrgb) {
// 无需重新编译
if (_constantBuffer) {
_UpdateConstants();
}
return;
}
@ -113,10 +108,10 @@ HRESULT ShaderEffectDrawer::Update(EffectDrawerState& state, std::string& messag
}
if (_drawInfo) {
if (_shouldCreateTextures) {
HRESULT hr = _CreateTextures();
if (_shouldUpdateSizeDependentResources) {
HRESULT hr = _UpdateSizeDependentResources();
if (FAILED(hr)) {
Logger::Get().ComError("_CreateTextures 失败", hr);
Logger::Get().ComError("_UpdateSizeDependentResources 失败", hr);
return hr;
}
@ -527,12 +522,6 @@ HRESULT ShaderEffectDrawer::_CreateDeviceResources() noexcept {
}
}
HRESULT hr = _CreateTextures();
if (FAILED(hr)) {
Logger::Get().ComError("_CreateTextures 失败", hr);
return hr;
}
// 常量缓冲区可以复用
if (!_constantBuffer) {
// 10 个内置常量
@ -557,7 +546,7 @@ HRESULT ShaderEffectDrawer::_CreateDeviceResources() noexcept {
CD3DX12_RESOURCE_DESC bufferDesc = CD3DX12_RESOURCE_DESC::Buffer(UINT64(paramCount * 4));
hr = device->CreateCommittedResource(
HRESULT hr = device->CreateCommittedResource(
&heapProperties,
heapFlag,
&bufferDesc,
@ -595,8 +584,12 @@ HRESULT ShaderEffectDrawer::_CreateDeviceResources() noexcept {
}
}
_UpdateConstants();
HRESULT hr = _UpdateSizeDependentResources();
if (FAILED(hr)) {
Logger::Get().ComError("_UpdateSizeDependentResources 失败", hr);
return hr;
}
return S_OK;
}
@ -687,7 +680,9 @@ void ShaderEffectDrawer::_UpdateConstants() noexcept {
_shouldUpdateConstantBuffer = true;
}
HRESULT ShaderEffectDrawer::_CreateTextures() noexcept {
HRESULT ShaderEffectDrawer::_UpdateSizeDependentResources() noexcept {
_shouldUpdateSizeDependentResources = false;
const uint32_t textureCount = (uint32_t)_drawInfo->textures.size();
if (textureCount != 0) {
ID3D12Device5* device = _d3d12Context->GetDevice();
@ -876,7 +871,7 @@ HRESULT ShaderEffectDrawer::_CreateTextures() noexcept {
};
}
_shouldCreateTextures = false;
_UpdateConstants();
return S_OK;
}

View file

@ -30,7 +30,7 @@ private:
void _UpdateConstants() noexcept;
HRESULT _CreateTextures() noexcept;
HRESULT _UpdateSizeDependentResources() noexcept;
D3D12Context* _d3d12Context = nullptr;
const EffectOption* _effectOption = nullptr;
@ -73,7 +73,7 @@ private:
// 的索引都加上 _textures.size() 作为区分。
SmallVector<uint32_t> _textureDescriptorMap;
bool _shouldCreateTextures = false;
bool _shouldUpdateSizeDependentResources = false;
bool _shouldUpdateConstantBuffer = false;
};