feat: 着色器效果渲染 (p1)

This commit is contained in:
Xu 2026-04-02 16:05:23 +08:00
commit 355dfbb7e2
12 changed files with 520 additions and 56 deletions

View file

@ -2,20 +2,17 @@
// 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale/Anime4K_Upscale_CNN_x2_S.glsl
//!MAGPIE EFFECT
//!VERSION 4
//!VERSION 5
//!SORT_NAME Anime4K_Upscale_0
//!USE MulAdd
//!CAPABILITY FP16
//!SCALE_FACTOR 2
#include "../StubDefs.hlsli"
//!TEXTURE
Texture2D INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!TEXTURE
@ -38,7 +35,6 @@ SamplerState sam;
//!FILTER LINEAR
SamplerState sam1;
//!PASS 1
//!DESC Conv-4x3x3x3
//!IN INPUT
@ -97,7 +93,6 @@ void Pass1(uint2 blockStart, uint3 threadId) {
tex1[gxy] = A4KS1(src, 1, 2);
}
//!PASS 2
//!DESC Conv-4x3x3x8
//!IN tex1
@ -240,7 +235,6 @@ void Pass3(uint2 blockStart, uint3 threadId) {
tex1[gxy] = A4KS3(src, 1, 2);
}
//!PASS 4
//!DESC Conv-4x3x3x8, Depth-to-Space
//!IN INPUT, tex1

View file

@ -26,9 +26,9 @@ public:
const EffectOption& effectOption
) noexcept = 0;
virtual bool Bind(SizeU inputSize, const ColorInfo& colorInfo) noexcept = 0;
virtual void Bind(SizeU inputSize, SizeU outputSize, const ColorInfo& colorInfo) noexcept = 0;
virtual EffectDrawerState GetState() noexcept = 0;
virtual HRESULT Update(EffectDrawerState& state, std::string& message) noexcept = 0;
virtual HRESULT Draw(
ComputeContext& computeContext,

View file

@ -92,12 +92,17 @@ bool EffectsDrawer::Initialize(
effectData.drawer = std::make_unique<ShaderEffectDrawer>();
effectData.effectInfo = effectData.drawer->Initialize(d3d12Context, options.effects[i]);
if (!effectData.effectInfo) {
Logger::Get().Error("ShaderEffectDrawer::Initialize 失败");
return false;
}
outputSize = CalcOutputSize(
// outputSize 是前一个效果的输出尺寸,即当前效果的输入尺寸
effectData.outputSize = CalcOutputSize(
effectData.effectInfo->scaleFactor, outputSize, rendererSize, effectOption);
effectData.outputSize = outputSize;
effectData.drawer->Bind(outputSize, effectData.outputSize, colorInfo);
outputSize = effectData.outputSize;
}
// 如果输出尺寸比渲染区域更大则使用 CatmullRom 等比缩小,更小时不放大
@ -115,12 +120,6 @@ bool EffectsDrawer::Initialize(
_outputSize = outputSize;
for (auto& effectData : _effectDatas) {
if (!effectData.drawer->Bind(outputSize, colorInfo)) {
return false;
}
}
// CatmullRomDrawer 将在渲染时按需创建 PSO初始化无代价
_catmullRomDrawer.Initialize(d3d12Context);
@ -192,6 +191,16 @@ HRESULT EffectsDrawer::Draw(
//commandList->EndQuery(_queryHeap.get(), D3D12_QUERY_TYPE_TIMESTAMP, queryHeapIndex);
for (auto& effectData : _effectDatas) {
EffectDrawerState state;
std::string msg;
HRESULT hr = effectData.drawer->Update(state, msg);
if (FAILED(hr)) {
Logger::Get().ComError("ShaderEffectDrawer::Update 失败", hr);
return hr;
}
}
_catmullRomDrawer.Draw(
computeContext, _inputSize, _outputSize, inputSrvOffset, outputUavOffset, false);

View file

@ -5,6 +5,8 @@ namespace Magpie {
enum class ShaderEffectTextureFormat {
UNKNOWN,
// 在 sRGB 和 scRGB 下提供不同的精度
COLOR_SPACE_ADAPTIVE,
R8_UNORM,
R8_SNORM,
R16_UNORM,
@ -38,7 +40,8 @@ struct ShaderEffectTextureFormatProps {
};
static constexpr ShaderEffectTextureFormatProps SHADER_TEXTURE_FORMAT_PROPS[] = {
{"UNKNOWN", DXGI_FORMAT_UNKNOWN, 4, "float4", "float4"},
{"UNKNOWN", DXGI_FORMAT_UNKNOWN, 4, nullptr, nullptr},
{"COLOR_SPACE_ADAPTIVE", DXGI_FORMAT_UNKNOWN, 4, "MF", "MF"},
{"R8_UNORM", DXGI_FORMAT_R8_UNORM, 1, "MF", "unorm MF"},
{"R8_SNORM", DXGI_FORMAT_R8_SNORM, 1, "MF", "snorm MF"},
{"R16_UNORM", DXGI_FORMAT_R16_UNORM, 1, "MF", "unorm MF"},

View file

@ -1,15 +1,30 @@
#include "pch.h"
#include "D3D12Context.h"
#include "DescriptorHeap.h"
#include "EffectsService.h"
#include "Logger.h"
#include "ScalingOptions.h"
#include "ScalingWindow.h"
#include "ShaderEffectDrawer.h"
// Conan 的 muparser 不含 UNICODE 支持
#pragma push_macro("_UNICODE")
#undef _UNICODE
#include <muParser.h>
#pragma pop_macro("_UNICODE")
namespace Magpie {
ShaderEffectDrawer::~ShaderEffectDrawer() noexcept {
EffectsService::Get().ReleaseTask(_compilationTaskId);
if (!_passDatas.empty()) {
uint32_t descriptorBaseOffset = _passDatas[0].descriptorBaseOffset;
if (descriptorBaseOffset != std::numeric_limits<uint32_t>::max()) {
_d3d12Context->GetDescriptorHeap().Free(descriptorBaseOffset, _descriptorCount);
}
}
if (!_compilationTaskId.empty()) {
EffectsService::Get().ReleaseTask(_compilationTaskId);
}
}
const EffectInfo* ShaderEffectDrawer::Initialize(
@ -28,9 +43,28 @@ const EffectInfo* ShaderEffectDrawer::Initialize(
return effectInfo;
}
bool ShaderEffectDrawer::Bind(SizeU /*inputSize*/, const ColorInfo& colorInfo) noexcept {
const ScalingOptions& options = ScalingWindow::Get().Options();
void ShaderEffectDrawer::Bind(SizeU inputSize, SizeU outputSize, const ColorInfo& colorInfo) noexcept {
if (!_errorMsg.empty()) {
return;
}
_inputSize = inputSize;
_outputSize = outputSize;
bool wasSrgb = _colorInfo.kind != winrt::AdvancedColorKind::StandardDynamicRange;
bool isSrgb = colorInfo.kind != winrt::AdvancedColorKind::StandardDynamicRange;
_colorInfo = colorInfo;
if (wasSrgb == isSrgb && !_compilationTaskId.empty()) {
// TODO: 更新常量
return;
}
if (!_compilationTaskId.empty()) {
EffectsService::Get().ReleaseTask(_compilationTaskId);
_drawInfo = nullptr;
}
const ScalingOptions& options = ScalingWindow::Get().Options();
_compilationTaskId = EffectsService::Get().SubmitCompileShaderEffectTask(
_effectOption->name,
options.IsInlineParams() ? &_effectOption->parameters : nullptr,
@ -41,23 +75,47 @@ bool ShaderEffectDrawer::Bind(SizeU /*inputSize*/, const ColorInfo& colorInfo) n
options.IsWarningsAreErrors()
);
if (_compilationTaskId.empty()) {
_errorMsg = "编译失败";
Logger::Get().Error("EffectsService::SubmitCompileShaderEffectTask 失败");
return false;
}
return true;
}
EffectDrawerState ShaderEffectDrawer::GetState() noexcept {
HRESULT ShaderEffectDrawer::Update(EffectDrawerState& state, std::string& message) noexcept {
if (_drawInfo) {
return EffectDrawerState::Ready;
state = EffectDrawerState::Ready;
return S_OK;
}
if (EffectsService::Get().GetTaskResult(_compilationTaskId, &_drawInfo)) {
return _drawInfo ? EffectDrawerState::Ready : EffectDrawerState::NotReady;
} else {
return EffectDrawerState::Error;
if (!_errorMsg.empty()) {
state = EffectDrawerState::Error;
message = _errorMsg;
return S_OK;
}
if (!EffectsService::Get().GetTaskResult(_compilationTaskId, &_drawInfo)) {
state = EffectDrawerState::Error;
return S_OK;
}
if (!_drawInfo) {
state = EffectDrawerState::NotReady;
return S_OK;
}
HRESULT hr = _CreateDeviceResources();
if (FAILED(hr)) {
Logger::Get().ComError("_CreateDeviceResources 失败", hr);
return hr;
}
if (!_errorMsg.empty()) {
_drawInfo = nullptr;
state = EffectDrawerState::Error;
return S_OK;
}
state = EffectDrawerState::Ready;
return S_OK;
}
HRESULT ShaderEffectDrawer::Draw(
@ -66,7 +124,341 @@ HRESULT ShaderEffectDrawer::Draw(
uint32_t /*outputUavOffset*/
) noexcept {
assert(_drawInfo);
return E_NOTIMPL;
}
HRESULT ShaderEffectDrawer::_CreateDeviceResources() {
ID3D12Device5* device = _d3d12Context->GetDevice();
const uint32_t passCount = (uint32_t)_drawInfo->passes.size();
_passDatas.resize(passCount);
for (uint32_t passIdx = 0; passIdx < passCount; ++passIdx) {
_PassData& curPassData = _passDatas[passIdx];
const ShaderEffectPassDesc& curPassDesc = _drawInfo->passes[passIdx];
winrt::com_ptr<ID3DBlob> signature;
std::array<D3D12_ROOT_PARAMETER1, 4> rootParams{};
uint32_t curRootParamIdx = 0;
std::array<D3D12_DESCRIPTOR_RANGE1, 4> descriptorRanges{};
uint32_t curDescriptorRangeIdx = 0;
rootParams[curRootParamIdx++] = D3D12_ROOT_PARAMETER1{
.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV,
.Descriptor = {
.ShaderRegister = 0,
.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE
}
};
// INPUT 和 OUTPUT 的描述符独立, 其他描述符连续
std::span<const uint32_t> otherInputs;
std::span<const uint32_t> otherOutputs;
if (!curPassDesc.inputs.empty()) {
if (curPassDesc.inputs[0] == 0) {
descriptorRanges[curDescriptorRangeIdx] = CD3DX12_DESCRIPTOR_RANGE1(
D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 1, 0, 0,
D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE);
rootParams[curRootParamIdx++] = D3D12_ROOT_PARAMETER1{
.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
.DescriptorTable = {
.NumDescriptorRanges = 1,
.pDescriptorRanges = &descriptorRanges[curDescriptorRangeIdx]
}
};
++curDescriptorRangeIdx;
otherInputs = std::span(curPassDesc.inputs.begin() + 1, curPassDesc.inputs.end());
} else {
otherInputs = curPassDesc.inputs;
}
}
assert(!curPassDesc.outputs.empty());
if (curPassDesc.outputs[0] == 1) {
descriptorRanges[curDescriptorRangeIdx] = CD3DX12_DESCRIPTOR_RANGE1(
D3D12_DESCRIPTOR_RANGE_TYPE_UAV, 1, 0, 0, D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE);
rootParams[curRootParamIdx++] = D3D12_ROOT_PARAMETER1{
.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
.DescriptorTable = {
.NumDescriptorRanges = 1,
.pDescriptorRanges = &descriptorRanges[curDescriptorRangeIdx]
}
};
++curDescriptorRangeIdx;
otherOutputs = std::span(curPassDesc.outputs.begin() + 1, curPassDesc.outputs.end());
} else {
otherOutputs = curPassDesc.outputs;
}
if (!otherInputs.empty() || !otherOutputs.empty()) {
const uint32_t startIdx = curDescriptorRangeIdx;
if (!otherInputs.empty()) {
descriptorRanges[curDescriptorRangeIdx++] = CD3DX12_DESCRIPTOR_RANGE1(
D3D12_DESCRIPTOR_RANGE_TYPE_SRV,
(UINT)otherInputs.size(),
UINT(otherInputs.data() != curPassDesc.inputs.data()),
0,
D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE
);
}
if (!otherOutputs.empty()) {
descriptorRanges[curDescriptorRangeIdx++] = CD3DX12_DESCRIPTOR_RANGE1(
D3D12_DESCRIPTOR_RANGE_TYPE_UAV,
(UINT)otherOutputs.size(),
UINT(otherOutputs.data() != curPassDesc.outputs.data()),
0,
D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE
);
}
rootParams[curRootParamIdx++] = D3D12_ROOT_PARAMETER1{
.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE,
.DescriptorTable = {
.NumDescriptorRanges = curDescriptorRangeIdx - startIdx,
.pDescriptorRanges = &descriptorRanges[startIdx]
}
};
}
SmallVector<D3D12_STATIC_SAMPLER_DESC> samplerDescs(_drawInfo->samplers.size());
for (size_t i = 0; i < samplerDescs.size(); ++i) {
const ShaderEffectSamplerDesc& samplerDesc = _drawInfo->samplers[i];
D3D12_TEXTURE_ADDRESS_MODE addressMode =
samplerDesc.addressType == ShaderEffectSamplerAddressType::Clamp ?
D3D12_TEXTURE_ADDRESS_MODE_CLAMP : D3D12_TEXTURE_ADDRESS_MODE_WRAP;
samplerDescs[i] = D3D12_STATIC_SAMPLER_DESC{
.Filter = samplerDesc.filterType == ShaderEffectSamplerFilterType::Point ?
D3D12_FILTER_MIN_MAG_MIP_POINT : D3D12_FILTER_MIN_MAG_MIP_LINEAR,
.AddressU = addressMode,
.AddressV = addressMode,
.AddressW = addressMode,
.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER,
.ShaderRegister = (UINT)i
};
}
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC rootSignatureDesc(
(UINT)curRootParamIdx, rootParams.data(),
(UINT)samplerDescs.size(), samplerDescs.data());
HRESULT hr = D3DX12SerializeVersionedRootSignature(
&rootSignatureDesc,
_d3d12Context->GetRootSignatureVersion(),
signature.put(),
nullptr
);
if (FAILED(hr)) {
Logger::Get().ComError("D3DX12SerializeVersionedRootSignature 失败", hr);
return hr;
}
hr = device->CreateRootSignature(
0,
signature->GetBufferPointer(),
signature->GetBufferSize(),
IID_PPV_ARGS(&curPassData.rootSignature)
);
if (FAILED(hr)) {
Logger::Get().ComError("CreateRootSignature 失败", hr);
return hr;
}
D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {
.pRootSignature = curPassData.rootSignature.get(),
.CS = CD3DX12_SHADER_BYTECODE(curPassDesc.byteCode.get())
};
hr = device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(&curPassData.pso));
if (FAILED(hr)) {
Logger::Get().ComError("CreateComputePipelineState 失败", hr);
return hr;
}
}
// 需要的描述符数量不会变化
if (_descriptorCount == 0) {
// TODO: 合并输入和输出相同的通道
for (uint32_t passIdx = 0; passIdx < passCount; ++passIdx) {
_PassData& curPassData = _passDatas[passIdx];
const ShaderEffectPassDesc& curPassDesc = _drawInfo->passes[passIdx];
// 暂时保存偏移
curPassData.descriptorBaseOffset = _descriptorCount;
_descriptorCount += (uint32_t)curPassDesc.inputs.size();
_descriptorCount += (uint32_t)curPassDesc.outputs.size();
if (!curPassDesc.inputs.empty() && curPassDesc.inputs[0] == 0) {
--_descriptorCount;
}
if (curPassDesc.outputs[0] == 1) {
--_descriptorCount;
}
}
uint32_t descriptorBaseOffset;
HRESULT hr = _d3d12Context->GetDescriptorHeap()
.Alloc(_descriptorCount, descriptorBaseOffset);
if (FAILED(hr)) {
Logger::Get().ComError("DescriptorHeap::Alloc 失败", hr);
return hr;
}
for (_PassData& passData : _passDatas) {
passData.descriptorBaseOffset += descriptorBaseOffset;
}
}
{
_textures.resize(_drawInfo->textures.size());
mu::Parser exprParser;
exprParser.DefineConst("INPUT_WIDTH", _inputSize.width);
exprParser.DefineConst("INPUT_HEIGHT", _inputSize.height);
exprParser.DefineConst("OUTPUT_WIDTH", _outputSize.width);
exprParser.DefineConst("OUTPUT_HEIGHT", _outputSize.height);
for (size_t i = 0; i < _textures.size(); ++i) {
const ShaderEffectTextureDesc& effectTexDesc = _drawInfo->textures[i];
DXGI_FORMAT dxgiFormat;
if (effectTexDesc.source.empty()) {
SizeU texSize{};
try {
exprParser.SetExpr(effectTexDesc.widthExpr);
long width = std::lround(exprParser.Eval());
exprParser.SetExpr(effectTexDesc.heightExpr);
long height = std::lround(exprParser.Eval());
if (width > 0 && height > 0) {
texSize.width = (uint32_t)width;
texSize.height = (uint32_t)height;
}
} catch (const mu::ParserError& e) {
Logger::Get().Error(fmt::format("计算纹理 {} 尺寸失败: {}",
effectTexDesc.name, e.GetMsg()));
}
if (texSize.width == 0) {
_errorMsg = fmt::format("计算纹理 {} 尺寸失败", effectTexDesc.name);
return S_OK;
}
if (effectTexDesc.format == ShaderEffectTextureFormat::COLOR_SPACE_ADAPTIVE) {
if (_colorInfo.kind == winrt::AdvancedColorKind::StandardDynamicRange) {
dxgiFormat = DXGI_FORMAT_R10G10B10A2_UNORM;
} else {
dxgiFormat = DXGI_FORMAT_R16G16B16A16_FLOAT;
}
} else {
dxgiFormat = SHADER_TEXTURE_FORMAT_PROPS[(uint32_t)effectTexDesc.format].dxgiFormat;
}
if (_textures[i]) {
D3D12_RESOURCE_DESC texDesc = _textures[i]->GetDesc();
if (texDesc.Width == texSize.width && texDesc.Height == texSize.height &&
texDesc.Format == dxgiFormat) {
continue;
}
}
CD3DX12_HEAP_PROPERTIES heapProps(D3D12_HEAP_TYPE_DEFAULT);
D3D12_HEAP_FLAGS heapFlags = _d3d12Context->IsHeapFlagCreateNotZeroedSupported() ?
D3D12_HEAP_FLAG_CREATE_NOT_ZEROED : D3D12_HEAP_FLAG_NONE;
CD3DX12_RESOURCE_DESC texDesc = CD3DX12_RESOURCE_DESC::Tex2D(dxgiFormat,
texSize.width, texSize.height, 1, 1, 1, 0,
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
);
HRESULT hr = device->CreateCommittedResource(&heapProps, heapFlags, &texDesc,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, IID_PPV_ARGS(&_textures[i]));
if (FAILED(hr)) {
Logger::Get().ComError("CreateCommittedResource 失败", hr);
return hr;
}
} else {
if (_textures[i]) {
continue;
}
// TODO
}
auto& descriptorHeap = _d3d12Context->GetDescriptorHeap();
const uint32_t descriptorSize = descriptorHeap.GetDescriptorSize();
for (uint32_t passIdx = 0; passIdx < passCount; ++passIdx) {
const ShaderEffectPassDesc& curPassDesc = _drawInfo->passes[passIdx];
CD3DX12_CPU_DESCRIPTOR_HANDLE descriptorCpuHandle(
descriptorHeap.GetCpuHandle(_passDatas[passIdx].descriptorBaseOffset));
CD3DX12_SHADER_RESOURCE_VIEW_DESC srvDesc =
CD3DX12_SHADER_RESOURCE_VIEW_DESC::Tex2D(dxgiFormat, 1);
if (!curPassDesc.inputs.empty()) {
auto it = std::find(curPassDesc.inputs.begin(), curPassDesc.inputs.end(), i + 2);
if (it != curPassDesc.inputs.end()) {
size_t offset = it - curPassDesc.inputs.begin();
if (curPassDesc.inputs[0] == 0) {
--offset;
}
device->CreateShaderResourceView(_textures[i].get(), &srvDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptorCpuHandle, (INT)offset, descriptorSize));
}
descriptorCpuHandle.Offset(
curPassDesc.inputs[0] == 0 ? curPassDesc.inputs.size() - 1 : curPassDesc.inputs.size(),
descriptorSize
);
}
CD3DX12_UNORDERED_ACCESS_VIEW_DESC uavDesc =
CD3DX12_UNORDERED_ACCESS_VIEW_DESC::Tex2D(dxgiFormat);
auto it = std::find(curPassDesc.outputs.begin(), curPassDesc.outputs.end(), i + 2);
if (it != curPassDesc.outputs.end()) {
size_t offset = it - curPassDesc.outputs.begin();
if (curPassDesc.outputs[0] == 1) {
--offset;
}
device->CreateUnorderedAccessView(_textures[i].get(), nullptr, &uavDesc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptorCpuHandle, (INT)offset, descriptorSize));
}
}
}
}
for (uint32_t passIdx = 0; passIdx < passCount; ++passIdx) {
const ShaderEffectPassDesc& curPassDesc = _drawInfo->passes[passIdx];
SizeU outputSize;
if (curPassDesc.outputs[0] == 1) {
outputSize = _outputSize;
} else {
D3D12_RESOURCE_DESC texDesc = _textures[size_t(curPassDesc.outputs[0] - 2)]->GetDesc();
outputSize = { (uint32_t)texDesc.Width,(uint32_t)texDesc.Height };
}
_passDatas[passIdx].dispatchCount = {
(outputSize.width + curPassDesc.blockSize.width - 1) / curPassDesc.blockSize.width,
(outputSize.height + curPassDesc.blockSize.height - 1) / curPassDesc.blockSize.height
};
}
return S_OK;
}
}

View file

@ -1,5 +1,6 @@
#pragma once
#include "EffectDrawerBase.h"
#include "SmallVector.h"
namespace Magpie {
@ -14,9 +15,9 @@ public:
const EffectOption& effectOption
) noexcept override;
bool Bind(SizeU inputSize, const ColorInfo& colorInfo) noexcept override;
void Bind(SizeU inputSize, SizeU outputSize, const ColorInfo& colorInfo) noexcept override;
EffectDrawerState GetState() noexcept override;
HRESULT Update(EffectDrawerState& state, std::string& message) noexcept override;
HRESULT Draw(
ComputeContext& computeContext,
@ -25,11 +26,27 @@ public:
) noexcept override;
private:
HRESULT _CreateDeviceResources();
D3D12Context* _d3d12Context = nullptr;
const EffectOption* _effectOption = nullptr;
SizeU _inputSize{};
SizeU _outputSize{};
ColorInfo _colorInfo;
std::string _compilationTaskId;
const ShaderEffectDrawInfo* _drawInfo;
const ShaderEffectDrawInfo* _drawInfo = nullptr;
std::string _errorMsg;
struct _PassData {
winrt::com_ptr<ID3D12RootSignature> rootSignature;
winrt::com_ptr<ID3D12PipelineState> pso;
uint32_t descriptorBaseOffset = std::numeric_limits<uint32_t>::max();
SizeU dispatchCount;
};
SmallVector<_PassData> _passDatas;
SmallVector<winrt::com_ptr<ID3D12Resource>> _textures;
uint32_t _descriptorCount = 0;
};
}

View file

@ -938,6 +938,12 @@ static bool ResolveTextureBlock(
state.errorMsg = "SOURCE 和 WIDTH/HEIGHT 冲突";
return false;
}
// SOURCE 和 COLOR_SPACE_ADAPTIVE 格式冲突
if (desc.format == ShaderEffectTextureFormat::COLOR_SPACE_ADAPTIVE) {
state.errorMsg = "SOURCE 和 COLOR_SPACE_ADAPTIVE 格式冲突";
return false;
}
}
}
@ -1546,6 +1552,10 @@ static void GenerateShaderSources(
SmallVectorImpl<ShaderEffectSource>& effectSources
) noexcept {
const uint32_t passCount = (uint32_t)drawInfo.passes.size();
const bool isFP16Enabled = bool(effectInfo.flags & EffectFlags::SupportFP16) &&
bool(options.flags & ShaderEffectParserFlags::EnableFP16);
const bool isAdvancedColorEnabled = bool(effectInfo.flags & EffectFlags::SupportAdvancedColor) &&
bool(options.flags & ShaderEffectParserFlags::EnableAdvancedColor);
// 所有通道共用的常量缓冲区
std::string headerCode = R"(cbuffer __CB1 : register(b0) {
@ -1563,6 +1573,11 @@ static void GenerateShaderSources(
}
}
// WCG/HDR 需要额外常量
if (isAdvancedColorEnabled) {
headerCode.append("\tfloat __maxLuminance;\n\tfloat __sdrWhiteLevel\n");
}
constexpr const char* PARAM_TYPE_STRS[] = { "float ","int ","uint " };
if (!options.inlineParams) {
@ -1604,7 +1619,10 @@ static void GenerateShaderSources(
const BlockData& curPassCodeBlock = passCodeBlocks[passIdx];
// 内置宏
macros.reserve(32);
macros.reserve(48);
#ifdef _DEBUG
macros.emplace_back("MP_DEBUG", "");
#endif
macros.emplace_back("MP_BLOCK_WIDTH", StrHelper::ToString(curPassDesc.blockSize.width));
macros.emplace_back("MP_BLOCK_HEIGHT", StrHelper::ToString(curPassDesc.blockSize.height));
macros.emplace_back("MP_NUM_THREADS_X", StrHelper::ToString(curPassDesc.numThreads[0]));
@ -1619,14 +1637,40 @@ static void GenerateShaderSources(
macros.emplace_back("MP_INLINE_PARAMS", "");
}
#ifdef _DEBUG
macros.emplace_back("MP_DEBUG", "");
#endif
if (isAdvancedColorEnabled) {
macros.emplace_back("MP_CS_SCRGB", "");
} else {
macros.emplace_back("MP_CS_LINEAR_SRGB", "");
}
// MP_SM 宏
{
// 不包含 SM 5.1,因为 D3D12 始终支持
constexpr std::array allModelVersions = {
std::make_pair(D3D_SHADER_MODEL_6_9, "MP_SM_6_9"),
std::make_pair(D3D_SHADER_MODEL_6_8, "MP_SM_6_8"),
std::make_pair(D3D_SHADER_MODEL_6_7, "MP_SM_6_7"),
std::make_pair(D3D_SHADER_MODEL_6_6, "MP_SM_6_6"),
std::make_pair(D3D_SHADER_MODEL_6_5, "MP_SM_6_5"),
std::make_pair(D3D_SHADER_MODEL_6_4, "MP_SM_6_4"),
std::make_pair(D3D_SHADER_MODEL_6_3, "MP_SM_6_3"),
std::make_pair(D3D_SHADER_MODEL_6_2, "MP_SM_6_2"),
std::make_pair(D3D_SHADER_MODEL_6_1, "MP_SM_6_1"),
std::make_pair(D3D_SHADER_MODEL_6_0, "MP_SM_6_0")
};
auto it = std::find_if(
allModelVersions.begin(),
allModelVersions.end(),
[&](const auto& pair) { return pair.first == options.shaderModel; }
);
for (; it != allModelVersions.end(); ++it) {
macros.emplace_back(it->second, "");
}
}
// MF 宏
static const char* NUMBER_STRS[] = { "1","2","3","4" };
if (bool(effectInfo.flags & EffectFlags::SupportFP16) &&
bool(options.flags & ShaderEffectParserFlags::EnableFP16)) {
if (isFP16Enabled) {
macros.emplace_back("MP_FP16", "");
macros.emplace_back("MF", "min16float");
@ -1779,9 +1823,14 @@ MF4 MulAdd(MF4 x, MF4x4 y, MF4 a) {
result = mad(x.w, y._m30_m31_m32_m33, result);
return result;
}
)");
if (isAdvancedColorEnabled) {
source.append(R"(float GetMaxLuminance() { return __maxLuminance; }
float GetSdrWhiteLevel() { return __sdrWhiteLevel; }
)");
}
source.push_back('\n');
source.append(commonCodeBlock.source);
source.push_back('\n');
source.append(curPassCodeBlock.source)
@ -1804,7 +1853,7 @@ MF4 MulAdd(MF4 x, MF4x4 y, MF4 a) {
source.append(fmt::format(R"([numthreads(64, 1, 1)]
void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{
uint2 gxy = (gid.xy << 4u) + Rmp8x8(tid.x);
float2 pos = (gxy + 0.5f) * {1};
float2 pos = (gxy + 0.5) * {1};
float2 step = 8 * {1};
{2}[gxy] = Pass{0}(pos);
@ -1827,7 +1876,7 @@ void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{
source.append(fmt::format(R"([numthreads(64, 1, 1)]
void __M(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {{
uint2 gxy = (gid.xy << 4u) + Rmp8x8(tid.x);
float2 pos = (gxy + 0.5f) * __pass{0}OutputPt;
float2 pos = (gxy + 0.5) * __pass{0}OutputPt;
float2 step = 8 * __pass{0}OutputPt;
)", passIdxBase1));
for (uint32_t i = 0; i < outputCount; ++i) {

View file

@ -25,7 +25,7 @@ float4 weight4(float x) {
float4 CatmullRom(float2 pos) {
pos *= inputSize;
float2 pos1 = floor(pos - 0.5f) + 0.5f;
float2 pos1 = floor(pos - 0.5) + 0.5;
float2 f = pos - pos1;
float4 rowtaps = weight4(f.x);
@ -44,8 +44,8 @@ float4 CatmullRom(float2 pos) {
float v_middle_offset = coltaps.z * inputPt.y / v_weight_sum;
float v_middle = uv1.y + v_middle_offset;
int2 coord_top_left = int2(max(uv0 * inputSize, 0.5f));
int2 coord_bottom_right = int2(min(uv3 * inputSize, inputSize - 0.5f));
int2 coord_top_left = int2(max(uv0 * inputSize, 0.5));
int2 coord_bottom_right = int2(min(uv3 * inputSize, inputSize - 0.5));
float3 top = inputTex.Load(int3(coord_top_left, 0)).rgb * rowtaps.x;
top += inputTex.SampleLevel(linearSampler, float2(u_middle, uv0.y), 0).rgb * u_weight_sum;
@ -71,7 +71,7 @@ float4 CatmullRom(float2 pos) {
[numthreads(64, 1, 1)]
void main(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {
uint2 gxy = (gid.xy << 4) + Rmp8x8(tid.x);
float2 pos = (gxy + 0.5f) * outputPt;
float2 pos = (gxy + 0.5) * outputPt;
const float2 step = 8 * outputPt;
outputTex[gxy] = CatmullRom(pos);

View file

@ -18,7 +18,7 @@ float4 weight4(float x) {
float4 main(noperspective float2 uv : TEXCOORD) : SV_Target {
float2 pos = uv * inputSize;
float2 pos1 = floor(pos - 0.5f) + 0.5f;
float2 pos1 = floor(pos - 0.5) + 0.5;
float2 f = pos - pos1;
float4 rowtaps = weight4(f.x);

View file

@ -46,12 +46,12 @@ void main(uint3 tid : SV_GroupThreadID, uint3 gid : SV_GroupID) {
#else
// w z
// x y
float4 mask = 1.0f;
float4 mask = 1.0;
if (gxy.x + 1 >= dirtyRect.z) {
mask.yz = 0.0f;
mask.yz = 0.0;
}
if (gxy.y + 1 >= dirtyRect.w) {
mask.xy = 0.0f;
mask.xy = 0.0;
}
float4 c1 = tex1.GatherRed(sam, pos);

View file

@ -40,7 +40,7 @@ float4 main(noperspective float2 uv : TEXCOORD) : SV_TARGET {
// 255.001953 来自
// https://stackoverflow.com/questions/52103720/why-does-d3dcolortoubyte4-multiplies-components-by-255-001953f
origin = (uint3(origin * 255.001953f) ^ uint3(mask.rgb * 255.001953f)) / 255.0f;
origin = (uint3(origin * 255.001953) ^ uint3(mask.rgb * 255.001953)) / 255.0;
#ifndef MP_SRGB
origin *= white;

View file

@ -25,9 +25,9 @@ float4 main(noperspective float2 uv : TEXCOORD) : SV_TARGET {
#ifdef MP_SRGB
float c = xorMask ? 1.0f : 0.0f;
#else
float c = xorMask ? sdrWhiteLevel : 0.0f;
float c = xorMask ? sdrWhiteLevel : 0.0;
#endif
return float4(c, c, c, 1.0f);
return float4(c, c, c, 1.0);
}
float3 origin = originTex[uint2(uv * cursorSize) + originOffset].rgb;
@ -42,7 +42,7 @@ float4 main(noperspective float2 uv : TEXCOORD) : SV_TARGET {
if (xorMask) {
// 反色
origin = 1.0f - origin;
origin = 1.0 - origin;
}
#else
if (xorMask) {
@ -53,5 +53,5 @@ float4 main(noperspective float2 uv : TEXCOORD) : SV_TARGET {
}
#endif
return float4(origin, 1.0f);
return float4(origin, 1.0);
}