mirror of
https://github.com/Blinue/Magpie.git
synced 2026-06-24 02:04:10 +00:00
perf: 优化部分效果和修复渲染错误
This commit is contained in:
parent
379982086c
commit
7bc06cc10d
7 changed files with 91 additions and 114 deletions
|
|
@ -64,7 +64,6 @@ float4 weight4(float x) {
|
|||
);
|
||||
}
|
||||
|
||||
|
||||
float4 Pass1(float2 pos) {
|
||||
const float2 inputPt = GetInputPt();
|
||||
const float2 inputSize = GetInputSize();
|
||||
|
|
|
|||
|
|
@ -10,12 +10,11 @@
|
|||
// B = 0.825 to get rid of dithering. Increase B to get a fine sharpness, though dithering returns.
|
||||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE MulAdd
|
||||
//!VERSION 5
|
||||
//!CAPABILITY AdvancedColor
|
||||
|
||||
#include "StubDefs.hlsli"
|
||||
|
||||
|
||||
//!PARAMETER
|
||||
//!LABEL Window Sinc Param
|
||||
//!DEFAULT 0.5
|
||||
|
|
@ -50,7 +49,6 @@ Texture2D OUTPUT;
|
|||
//!FILTER POINT
|
||||
SamplerState sam;
|
||||
|
||||
|
||||
//!PASS 1
|
||||
//!IN INPUT
|
||||
//!OUT OUTPUT
|
||||
|
|
@ -62,7 +60,6 @@ SamplerState sam;
|
|||
#define min4(a, b, c, d) min(min(a, b), min(c, d))
|
||||
#define max4(a, b, c, d) max(max(a, b), max(c, d))
|
||||
|
||||
|
||||
float d(float2 pt1, float2 pt2) {
|
||||
float2 v = pt2 - pt1;
|
||||
return sqrt(dot(v, v));
|
||||
|
|
@ -108,9 +105,9 @@ void Pass1(uint2 blockStart, uint3 threadId) {
|
|||
[unroll]
|
||||
for (uint j = 0; j <= 2; j += 2) {
|
||||
float2 tpos = (tc + uint2(i, j)) * inputPt;
|
||||
const float4 sr = INPUT.GatherRed(sam, tpos);
|
||||
const float4 sg = INPUT.GatherGreen(sam, tpos);
|
||||
const float4 sb = INPUT.GatherBlue(sam, tpos);
|
||||
float4 sr = INPUT.GatherRed(sam, tpos);
|
||||
float4 sg = INPUT.GatherGreen(sam, tpos);
|
||||
float4 sb = INPUT.GatherBlue(sam, tpos);
|
||||
|
||||
// w z
|
||||
// x y
|
||||
|
|
@ -128,11 +125,9 @@ void Pass1(uint2 blockStart, uint3 threadId) {
|
|||
color *= rcp(dot(mul(weights, float4(1, 1, 1, 1)), 1));
|
||||
|
||||
// 抗振铃
|
||||
// Get min/max samples
|
||||
float3 min_sample = min4(src[1][1], src[2][1], src[1][2], src[2][2]);
|
||||
float3 max_sample = max4(src[1][1], src[2][1], src[1][2], src[2][2]);
|
||||
color = lerp(color, clamp(color, min_sample, max_sample), ARStrength);
|
||||
|
||||
// final sum and weight normalization
|
||||
|
||||
OUTPUT[gxy] = float4(color, 1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,9 +2,8 @@
|
|||
// 移植自 https://gist.github.com/igv/36508af3ffc84410fe39761d6969be10
|
||||
// 原始文件使用了大量 mpv 的“特性”,因此可能存在移植错误。如果你熟悉 mpv hook,请帮助我们改进
|
||||
|
||||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!VERSION 5
|
||||
|
||||
//!PARAMETER
|
||||
//!LABEL Oversharp
|
||||
|
|
@ -41,7 +40,7 @@ Texture2D MR;
|
|||
//!TEXTURE
|
||||
//!WIDTH OUTPUT_WIDTH
|
||||
//!HEIGHT OUTPUT_HEIGHT
|
||||
//!FORMAT R8G8B8A8_UNORM
|
||||
//!FORMAT COLOR_SPACE_ADAPTIVE
|
||||
Texture2D POSTKERNEL;
|
||||
|
||||
//!SAMPLER
|
||||
|
|
@ -52,7 +51,6 @@ SamplerState sam;
|
|||
//!FILTER LINEAR
|
||||
SamplerState sam1;
|
||||
|
||||
|
||||
//!PASS 1
|
||||
//!DESC CatumllRom
|
||||
//!STYLE PS
|
||||
|
|
@ -60,58 +58,60 @@ SamplerState sam1;
|
|||
//!OUT POSTKERNEL
|
||||
|
||||
// 模拟 mpv 的内置缩放(CatmullRom)
|
||||
// Samples a texture with Catmull-Rom filtering, using 9 texture fetches instead of 16.
|
||||
// See http://vec3.ca/bicubic-filtering-in-fewer-taps/ for more details
|
||||
|
||||
float4 weight4(float x) {
|
||||
// Sharper version. May look better in some cases. B=0, C=0.75
|
||||
return float4(
|
||||
((-0.75 * x + 1.5) * x - 0.75) * x,
|
||||
(1.25 * x - 2.25) * x * x + 1.0,
|
||||
((-1.25 * x + 1.5) * x + 0.75) * x,
|
||||
(0.75 * x - 0.75) * x * x
|
||||
);
|
||||
}
|
||||
|
||||
float4 Pass1(float2 pos) {
|
||||
float2 inputSize = GetInputSize();
|
||||
float2 inputPt = GetInputPt();
|
||||
const float2 inputPt = GetInputPt();
|
||||
const float2 inputSize = GetInputSize();
|
||||
|
||||
// We're going to sample a a 4x4 grid of texels surrounding the target UV coordinate. We'll do this by rounding
|
||||
// down the sample location to get the exact center of our "starting" texel. The starting texel will be at
|
||||
// location [1, 1] in the grid, where [0, 0] is the top left corner.
|
||||
float2 samplePos = pos * inputSize;
|
||||
float2 texPos1 = floor(samplePos - 0.5f) + 0.5f;
|
||||
pos *= inputSize;
|
||||
float2 pos1 = floor(pos - 0.5) + 0.5;
|
||||
float2 f = pos - pos1;
|
||||
|
||||
// Compute the fractional offset from our starting texel to our original sample location, which we'll
|
||||
// feed into the Catmull-Rom spline function to get our filter weights.
|
||||
float2 f = samplePos - texPos1;
|
||||
float4 rowtaps = weight4(f.x);
|
||||
float4 coltaps = weight4(f.y);
|
||||
|
||||
// Compute the Catmull-Rom weights using the fractional offset that we calculated earlier.
|
||||
// These equations are pre-expanded based on our knowledge of where the texels will be located,
|
||||
// which lets us avoid having to evaluate a piece-wise function.
|
||||
float2 w0 = f * (-0.5f + f * (1.0f - 0.5f * f));
|
||||
float2 w1 = 1.0f + f * f * (-2.5f + 1.5f * f);
|
||||
float2 w2 = f * (0.5f + f * (2.0f - 1.5f * f));
|
||||
float2 w3 = f * f * (-0.5f + 0.5f * f);
|
||||
float2 uv1 = pos1 * inputPt;
|
||||
float2 uv0 = uv1 - inputPt;
|
||||
float2 uv2 = uv1 + inputPt;
|
||||
float2 uv3 = uv2 + inputPt;
|
||||
|
||||
// Work out weighting factors and sampling offsets that will let us use bilinear filtering to
|
||||
// simultaneously evaluate the middle 2 samples from the 4x4 grid.
|
||||
float2 w12 = w1 + w2;
|
||||
float2 offset12 = w2 / (w1 + w2);
|
||||
float u_weight_sum = rowtaps.y + rowtaps.z;
|
||||
float u_middle_offset = rowtaps.z * inputPt.x / u_weight_sum;
|
||||
float u_middle = uv1.x + u_middle_offset;
|
||||
|
||||
// Compute the final UV coordinates we'll use for sampling the texture
|
||||
float2 texPos0 = texPos1 - 1;
|
||||
float2 texPos3 = texPos1 + 2;
|
||||
float2 texPos12 = texPos1 + offset12;
|
||||
float v_weight_sum = coltaps.y + coltaps.z;
|
||||
float v_middle_offset = coltaps.z * inputPt.y / v_weight_sum;
|
||||
float v_middle = uv1.y + v_middle_offset;
|
||||
|
||||
texPos0 *= inputPt;
|
||||
texPos3 *= inputPt;
|
||||
texPos12 *= inputPt;
|
||||
int2 coord_top_left = int2(max(uv0 * inputSize, 0.5));
|
||||
int2 coord_bottom_right = int2(min(uv3 * inputSize, inputSize - 0.5));
|
||||
|
||||
float4 result = 0.0f;
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos0.x, texPos0.y), 0) * w0.x * w0.y;
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos12.x, texPos0.y), 0) * w12.x * w0.y;
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos3.x, texPos0.y), 0) * w3.x * w0.y;
|
||||
float3 top = INPUT.Load(int3(coord_top_left, 0)).rgb * rowtaps.x;
|
||||
top += INPUT.SampleLevel(sam1, float2(u_middle, uv0.y), 0).rgb * u_weight_sum;
|
||||
top += INPUT.Load(int3(coord_bottom_right.x, coord_top_left.y, 0)).rgb * rowtaps.w;
|
||||
float3 total = top * coltaps.x;
|
||||
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos0.x, texPos12.y), 0) * w0.x * w12.y;
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos12.x, texPos12.y), 0) * w12.x * w12.y;
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos3.x, texPos12.y), 0) * w3.x * w12.y;
|
||||
float3 middle = INPUT.SampleLevel(sam1, float2(uv0.x, v_middle), 0).rgb * rowtaps.x;
|
||||
middle += INPUT.SampleLevel(sam1, float2(u_middle, v_middle), 0).rgb * u_weight_sum;
|
||||
middle += INPUT.SampleLevel(sam1, float2(uv3.x, v_middle), 0).rgb * rowtaps.w;
|
||||
total += middle * v_weight_sum;
|
||||
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos0.x, texPos3.y), 0) * w0.x * w3.y;
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos12.x, texPos3.y), 0) * w12.x * w3.y;
|
||||
result += INPUT.SampleLevel(sam1, float2(texPos3.x, texPos3.y), 0) * w3.x * w3.y;
|
||||
float3 bottom = INPUT.Load(int3(coord_top_left.x, coord_bottom_right.y, 0)).rgb * rowtaps.x;
|
||||
bottom += INPUT.SampleLevel(sam1, float2(u_middle, uv3.y), 0).rgb * u_weight_sum;
|
||||
bottom += INPUT.Load(int3(coord_bottom_right, 0)).rgb * rowtaps.w;
|
||||
total += bottom * coltaps.w;
|
||||
|
||||
return result;
|
||||
return float4(total, 1);
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
|
|
@ -124,7 +124,6 @@ float4 Pass1(float2 pos) {
|
|||
#define Kernel(x) MN(0.0f, 0.5f, abs(x))
|
||||
#define taps 2.0f
|
||||
|
||||
|
||||
float4 Pass2(float2 pos) {
|
||||
const float inputPtY = GetInputPt().y;
|
||||
const uint inputHeight = GetInputSize().y;
|
||||
|
|
@ -152,7 +151,6 @@ float4 Pass2(float2 pos) {
|
|||
return float4(avg, 1);
|
||||
}
|
||||
|
||||
|
||||
//!PASS 3
|
||||
//!DESC L2 pass 2
|
||||
//!STYLE PS
|
||||
|
|
@ -163,7 +161,6 @@ float4 Pass2(float2 pos) {
|
|||
#define Kernel(x) MN(0.0, 0.5, abs(x))
|
||||
#define taps 2.0
|
||||
|
||||
|
||||
float4 Pass3(float2 pos) {
|
||||
const float inputPtX = GetInputPt().x;
|
||||
const uint inputWidth = GetInputSize().x;
|
||||
|
|
@ -190,7 +187,6 @@ float4 Pass3(float2 pos) {
|
|||
return float4(avg, 1);
|
||||
}
|
||||
|
||||
|
||||
//!PASS 4
|
||||
//!DESC mean & R
|
||||
//!IN L2_2, POSTKERNEL
|
||||
|
|
@ -207,7 +203,6 @@ float4 Pass3(float2 pos) {
|
|||
|
||||
#define Luma(rgb) ( dot(rgb, float3(0.2126, 0.7152, 0.0722)) )
|
||||
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 threadId) {
|
||||
uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
|
||||
uint2 outputSize = GetOutputSize();
|
||||
|
|
@ -224,7 +219,7 @@ void Pass4(uint2 blockStart, uint3 threadId) {
|
|||
for (i = 0; i < taps; i += 2) {
|
||||
[unroll]
|
||||
for (j = 0; j < taps; j += 2) {
|
||||
const float2 tpos = (int2(gxy + uint2(i, j)) - taps / 2 + 1) * outputPt;
|
||||
float2 tpos = (int2(gxy + uint2(i, j)) - taps / 2 + 1) * outputPt;
|
||||
float4 sr = POSTKERNEL.GatherRed(sam, tpos);
|
||||
float4 sg = POSTKERNEL.GatherGreen(sam, tpos);
|
||||
float4 sb = POSTKERNEL.GatherBlue(sam, tpos);
|
||||
|
|
@ -258,13 +253,7 @@ void Pass4(uint2 blockStart, uint3 threadId) {
|
|||
[unroll]
|
||||
for (j = 0; j <= 1; ++j) {
|
||||
uint2 destPos = gxy + uint2(i, j);
|
||||
|
||||
if (i != 0 || j != 0) {
|
||||
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float W = 0.0;
|
||||
float3x3 avg = 0;
|
||||
|
||||
|
|
@ -293,7 +282,6 @@ void Pass4(uint2 blockStart, uint3 threadId) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
//!PASS 5
|
||||
//!DESC final pass
|
||||
//!IN MR, POSTKERNEL
|
||||
|
|
@ -307,7 +295,6 @@ void Pass4(uint2 blockStart, uint3 threadId) {
|
|||
// taps 需为奇数
|
||||
#define taps 3
|
||||
|
||||
|
||||
void Pass5(uint2 blockStart, uint3 threadId) {
|
||||
const uint2 gxy = (Rmp8x8(threadId.x) << 1) + blockStart;
|
||||
|
||||
|
|
@ -324,11 +311,11 @@ void Pass5(uint2 blockStart, uint3 threadId) {
|
|||
for (i = 0; i < taps; i += 2) {
|
||||
[unroll]
|
||||
for (j = 0; j < taps; j += 2) {
|
||||
const float2 tpos = (int2(gxy + uint2(i, j)) - taps / 2 + 1) * outputPt;
|
||||
const float4 sr = MR.GatherRed(sam, tpos);
|
||||
const float4 sg = MR.GatherGreen(sam, tpos);
|
||||
const float4 sb = MR.GatherBlue(sam, tpos);
|
||||
const float4 sa = MR.GatherAlpha(sam, tpos);
|
||||
float2 tpos = (int2(gxy + uint2(i, j)) - taps / 2 + 1) * outputPt;
|
||||
float4 sr = MR.GatherRed(sam, tpos);
|
||||
float4 sg = MR.GatherGreen(sam, tpos);
|
||||
float4 sb = MR.GatherBlue(sam, tpos);
|
||||
float4 sa = MR.GatherAlpha(sam, tpos);
|
||||
|
||||
// w z
|
||||
// x y
|
||||
|
|
@ -340,10 +327,10 @@ void Pass5(uint2 blockStart, uint3 threadId) {
|
|||
}
|
||||
|
||||
float3 src2[2][2];
|
||||
const float2 tpos = (gxy + 1) * outputPt;
|
||||
const float4 sr = POSTKERNEL.GatherRed(sam, tpos);
|
||||
const float4 sg = POSTKERNEL.GatherGreen(sam, tpos);
|
||||
const float4 sb = POSTKERNEL.GatherBlue(sam, tpos);
|
||||
float2 tpos = (gxy + 1) * outputPt;
|
||||
float4 sr = POSTKERNEL.GatherRed(sam, tpos);
|
||||
float4 sg = POSTKERNEL.GatherGreen(sam, tpos);
|
||||
float4 sb = POSTKERNEL.GatherBlue(sam, tpos);
|
||||
|
||||
// w z
|
||||
// x y
|
||||
|
|
|
|||
|
|
@ -512,6 +512,7 @@ winrt::fire_and_forget EffectsService::_CompileShaderEffectAsync(
|
|||
// 解析失败
|
||||
auto lk = _shaderEffectCacheLock.lock_exclusive();
|
||||
_shaderEffectCache.erase(cacheKey);
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (saveSources) {
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ struct ShaderEffectTextureFormatProps {
|
|||
|
||||
static constexpr ShaderEffectTextureFormatProps SHADER_TEXTURE_FORMAT_PROPS[] = {
|
||||
{"UNKNOWN", DXGI_FORMAT_UNKNOWN, 4, nullptr, nullptr},
|
||||
{"COLOR_SPACE_ADAPTIVE", DXGI_FORMAT_UNKNOWN, 4, "MF", "MF"},
|
||||
{"COLOR_SPACE_ADAPTIVE", DXGI_FORMAT_UNKNOWN, 4, "MF4", "MF4"},
|
||||
{"R8_UNORM", DXGI_FORMAT_R8_UNORM, 1, "MF", "unorm float"},
|
||||
{"R8_SNORM", DXGI_FORMAT_R8_SNORM, 1, "MF", "snorm float"},
|
||||
{"R16_UNORM", DXGI_FORMAT_R16_UNORM, 1, "MF", "unorm float"},
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ void ShaderEffectDrawer::Bind(SizeU inputSize, SizeU outputSize, const ColorInfo
|
|||
} else {
|
||||
_inputSize = inputSize;
|
||||
_outputSize = outputSize;
|
||||
_shouldCreateTextures = true;
|
||||
_shouldUpdateSizeDependentResources = true;
|
||||
}
|
||||
|
||||
bool wasSrgb = _colorInfo.kind != winrt::AdvancedColorKind::StandardDynamicRange;
|
||||
|
|
@ -75,11 +75,6 @@ void ShaderEffectDrawer::Bind(SizeU inputSize, SizeU outputSize, const ColorInfo
|
|||
|
||||
if (!_compilationTaskId.empty()) {
|
||||
if (wasSrgb == isSrgb) {
|
||||
// 无需重新编译
|
||||
if (_constantBuffer) {
|
||||
_UpdateConstants();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -113,10 +108,10 @@ HRESULT ShaderEffectDrawer::Update(EffectDrawerState& state, std::string& messag
|
|||
}
|
||||
|
||||
if (_drawInfo) {
|
||||
if (_shouldCreateTextures) {
|
||||
HRESULT hr = _CreateTextures();
|
||||
if (_shouldUpdateSizeDependentResources) {
|
||||
HRESULT hr = _UpdateSizeDependentResources();
|
||||
if (FAILED(hr)) {
|
||||
Logger::Get().ComError("_CreateTextures 失败", hr);
|
||||
Logger::Get().ComError("_UpdateSizeDependentResources 失败", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
|
|
@ -527,12 +522,6 @@ HRESULT ShaderEffectDrawer::_CreateDeviceResources() noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
HRESULT hr = _CreateTextures();
|
||||
if (FAILED(hr)) {
|
||||
Logger::Get().ComError("_CreateTextures 失败", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
// 常量缓冲区可以复用
|
||||
if (!_constantBuffer) {
|
||||
// 10 个内置常量
|
||||
|
|
@ -557,7 +546,7 @@ HRESULT ShaderEffectDrawer::_CreateDeviceResources() noexcept {
|
|||
|
||||
CD3DX12_RESOURCE_DESC bufferDesc = CD3DX12_RESOURCE_DESC::Buffer(UINT64(paramCount * 4));
|
||||
|
||||
hr = device->CreateCommittedResource(
|
||||
HRESULT hr = device->CreateCommittedResource(
|
||||
&heapProperties,
|
||||
heapFlag,
|
||||
&bufferDesc,
|
||||
|
|
@ -595,8 +584,12 @@ HRESULT ShaderEffectDrawer::_CreateDeviceResources() noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
_UpdateConstants();
|
||||
|
||||
HRESULT hr = _UpdateSizeDependentResources();
|
||||
if (FAILED(hr)) {
|
||||
Logger::Get().ComError("_UpdateSizeDependentResources 失败", hr);
|
||||
return hr;
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
|
@ -687,7 +680,9 @@ void ShaderEffectDrawer::_UpdateConstants() noexcept {
|
|||
_shouldUpdateConstantBuffer = true;
|
||||
}
|
||||
|
||||
HRESULT ShaderEffectDrawer::_CreateTextures() noexcept {
|
||||
HRESULT ShaderEffectDrawer::_UpdateSizeDependentResources() noexcept {
|
||||
_shouldUpdateSizeDependentResources = false;
|
||||
|
||||
const uint32_t textureCount = (uint32_t)_drawInfo->textures.size();
|
||||
if (textureCount != 0) {
|
||||
ID3D12Device5* device = _d3d12Context->GetDevice();
|
||||
|
|
@ -876,7 +871,7 @@ HRESULT ShaderEffectDrawer::_CreateTextures() noexcept {
|
|||
};
|
||||
}
|
||||
|
||||
_shouldCreateTextures = false;
|
||||
_UpdateConstants();
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ private:
|
|||
|
||||
void _UpdateConstants() noexcept;
|
||||
|
||||
HRESULT _CreateTextures() noexcept;
|
||||
HRESULT _UpdateSizeDependentResources() noexcept;
|
||||
|
||||
D3D12Context* _d3d12Context = nullptr;
|
||||
const EffectOption* _effectOption = nullptr;
|
||||
|
|
@ -73,7 +73,7 @@ private:
|
|||
// 的索引都加上 _textures.size() 作为区分。
|
||||
SmallVector<uint32_t> _textureDescriptorMap;
|
||||
|
||||
bool _shouldCreateTextures = false;
|
||||
bool _shouldUpdateSizeDependentResources = false;
|
||||
bool _shouldUpdateConstantBuffer = false;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue