[FX] 添加 CAPABILITY 指令 (#1164)

* feat: FP16 归类到 CAPABILITY

* feat: 效果本身的属性和编译赋予的属性分开

* feat: USE 和 CAPABILITY 禁止重复的标志

* docs: 更新文档
This commit is contained in:
Xu 2025-05-26 22:12:15 +08:00 committed by GitHub
commit a32ea649dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
65 changed files with 369 additions and 140 deletions

View file

@ -4,11 +4,13 @@ MagpieFX is based on DirectX 11 compute shader
//!MAGPIE EFFECT
//!VERSION 4
// Use the "USE" directive to declare the features being utilized. The following values can be combined:
// FP16: Declares support for FP16. However, this does not guarantee FP16 will be used. If the GPU
// does not support FP16 or the user has disabled it, this declaration has no effect.
// MulAdd: Enables the "MulAdd" function.
// Dynamic: Enables the "GetFrameCount" function.
//!USE FP16, MulAdd, Dynamic
//!USE MulAdd, Dynamic
// Use the "CAPABILITY" directive to declare the capabilities supported by this effect. Whether they
// are enabled depends on user configuration. The following values can be combined:
// FP16: Declares support for FP16.
//!CAPABILITY FP16
// Use "SORT_NAME" to specify the name used for sorting, otherwise the files will be sorted by their file
// names.
//!SORT_NAME test1
@ -172,7 +174,7 @@ void Pass2(uint2 blockStart, uint3 threadId) {
**MP_DEBUG**: Whether the shader is being compiled in debug mode (when compiling shaders in debug mode, they are not optimized and contain debug information).
**MP_FP16**: Whether to use half-precision floating-point numbers (specifed by user).
**MP_FP16**: Whether to use half-precision floating-point numbers.
**MF, MF1, MF2, ..., MF4x4**: Floating-point data types that conform to MP_FP16. When half-precision is not specified, they are aliases for float..., otherwise they are aliases for min16float...

View file

@ -4,10 +4,12 @@ MagpieFX 基于 DirectX 11 计算着色器
//!MAGPIE EFFECT
//!VERSION 4
// 使用 USE 指令声明使用的功能,支持以下值的组合:
// FP16声明对 FP16 的支持。注意这不能保证一定使用 FP16如果 GPU 不支持 FP16 或者用户禁用了 FP16这个声明没有效果
// MulAdd使 MulAdd 函数可用
// Dynamic使 GetFrameCount 函数可用
//!USE FP16, MulAdd, Dynamic
//!USE MulAdd, Dynamic
// 使用 CAPABILITY 指令声明效果所支持的技术,但是否使用这些技术取决于用户配置。支持以下值的组合:
// FP16声明对 FP16 的支持
//!CAPABILITY FP16
// 使用 SORT_NAME 指定排序时使用的名字,否则按照文件名排序
//!SORT_NAME test1
@ -165,7 +167,7 @@ void Pass2(uint2 blockStart, uint3 threadId) {
**MP_DEBUG**:当前是否为调试模式(调试模式下编译的着色器不进行优化且含有调试信息)
**MP_FP16**:当前是否使用半精度浮点数(由用户指定)
**MP_FP16**:当前是否使用半精度浮点数
**MF、MF1、MF2、...、MF4x4**:遵守 fp16 参数的浮点数类型。当未指定 fp16它们为 float... 的别名,否则为 min16float... 的别名

View file

@ -3,7 +3,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_3D_Upscale_1
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_3D_Upscale_0
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -3,7 +3,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_2
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_1
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_0
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -3,7 +3,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_Soft_2
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_Soft_1
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_Soft_0
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_Soft_4
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_Soft_3
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_4
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Restore_3
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_Denoise_1
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_Denoise_0
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_Denoise_3
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_Denoise_2
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_GAN_x2_2
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_GAN_x2_1
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -3,7 +3,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_1
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_0
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_3
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_2
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -2,7 +2,7 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D16N16
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D16N16
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D04N02
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D04N02
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D04N03
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D04N03
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D16N04
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D16N04
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D04N04
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D04N04
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D08N04
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D08N04
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D08N06
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D08N06
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D16N08
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D16N08
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D04N08
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D04N08
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-DN-D08N08
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -17,7 +17,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-D08N08
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-03x12
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -85,9 +86,11 @@ Texture2D T5;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1, T2
#define L0(x, y) V3(O(INPUT, x, y).rgb)
#define V3 MF3
#define M3x4 MF3x4
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -137,15 +140,18 @@ void Pass1(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T2[gxy] = r2;
}
//!PASS 2
//!DESC conv1 (12x12)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2
//!OUT T3, T4, T5
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -252,15 +258,18 @@ void Pass2(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T5[gxy] = r2;
}
//!PASS 3
//!DESC conv2 (12x12)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T3, T4, T5
//!OUT T0, T1, T2
#define L0(x, y) V4(O(T3, x, y))
#define L1(x, y) V4(O(T4, x, y))
#define L2(x, y) V4(O(T5, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -367,15 +376,18 @@ void Pass3(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T2[gxy] = r2;
}
//!PASS 4
//!DESC conv3 (12x12)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2
//!OUT T3, T4, T5
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -482,15 +494,18 @@ void Pass4(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T5[gxy] = r2;
}
//!PASS 5
//!DESC out-shuffle (12x12)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T3, T4, T5
//!OUT OUTPUT
#define L0(x, y) V4(O(T3, x, y))
#define L1(x, y) V4(O(T4, x, y))
#define L2(x, y) V4(O(T5, x, y))
void Pass5(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-04x12
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -85,9 +86,11 @@ Texture2D T5;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1, T2
#define L0(x, y) V3(O(INPUT, x, y).rgb)
#define V3 MF3
#define M3x4 MF3x4
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -137,15 +140,18 @@ void Pass1(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T2[gxy] = r2;
}
//!PASS 2
//!DESC conv1 (12x12)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2
//!OUT T3, T4, T5
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -252,15 +258,18 @@ void Pass2(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T5[gxy] = r2;
}
//!PASS 3
//!DESC conv2 (12x12)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T3, T4, T5
//!OUT T0, T1, T2
#define L0(x, y) V4(O(T3, x, y))
#define L1(x, y) V4(O(T4, x, y))
#define L2(x, y) V4(O(T5, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -367,15 +376,18 @@ void Pass3(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T2[gxy] = r2;
}
//!PASS 4
//!DESC conv3 (12x12)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2
//!OUT T3, T4, T5
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -482,15 +494,18 @@ void Pass4(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T5[gxy] = r2;
}
//!PASS 5
//!DESC conv4 (12x12)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T3, T4, T5
//!OUT T0, T1, T2
#define L0(x, y) V4(O(T3, x, y))
#define L1(x, y) V4(O(T4, x, y))
#define L2(x, y) V4(O(T5, x, y))
void Pass5(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -597,15 +612,18 @@ void Pass5(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T2[gxy] = r2;
}
//!PASS 6
//!DESC out-shuffle (12x12)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T0, T1, T2
//!OUT OUTPUT
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
void Pass6(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-04x16
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -97,9 +98,11 @@ Texture2D T7;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1, T2, T3
#define L0(x, y) V3(O(INPUT, x, y).rgb)
#define V3 MF3
#define M3x4 MF3x4
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -161,16 +164,19 @@ void Pass1(uint2 blockStart, uint3 tid) {
r3 = max(r3, 0.0);
T3[gxy] = r3;
}
//!PASS 2
//!DESC conv1 (16x16)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3
//!OUT T4, T5, T6, T7
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
#define L3(x, y) V4(O(T3, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -345,16 +351,19 @@ void Pass2(uint2 blockStart, uint3 tid) {
r3 = max(r3, 0.0);
T7[gxy] = r3;
}
//!PASS 3
//!DESC conv2 (16x16)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T4, T5, T6, T7
//!OUT T0, T1, T2, T3
#define L0(x, y) V4(O(T4, x, y))
#define L1(x, y) V4(O(T5, x, y))
#define L2(x, y) V4(O(T6, x, y))
#define L3(x, y) V4(O(T7, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -529,16 +538,19 @@ void Pass3(uint2 blockStart, uint3 tid) {
r3 = max(r3, 0.0);
T3[gxy] = r3;
}
//!PASS 4
//!DESC conv3 (16x16)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3
//!OUT T4, T5, T6, T7
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
#define L3(x, y) V4(O(T3, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -713,16 +725,19 @@ void Pass4(uint2 blockStart, uint3 tid) {
r3 = max(r3, 0.0);
T7[gxy] = r3;
}
//!PASS 5
//!DESC conv4 (16x16)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T4, T5, T6, T7
//!OUT T0, T1, T2, T3
#define L0(x, y) V4(O(T4, x, y))
#define L1(x, y) V4(O(T5, x, y))
#define L2(x, y) V4(O(T6, x, y))
#define L3(x, y) V4(O(T7, x, y))
void Pass5(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -897,16 +912,19 @@ void Pass5(uint2 blockStart, uint3 tid) {
r3 = max(r3, 0.0);
T3[gxy] = r3;
}
//!PASS 6
//!DESC out-shuffle (16x12)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T0, T1, T2, T3
//!OUT OUTPUT
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
#define L3(x, y) V4(O(T3, x, y))
void Pass6(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-04x24
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -121,9 +122,11 @@ Texture2D T11;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1, T2, T3, T4, T5
#define L0(x, y) V3(O(INPUT, x, y).rgb)
#define V3 MF3
#define M3x4 MF3x4
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -209,18 +212,21 @@ void Pass1(uint2 blockStart, uint3 tid) {
r5 = max(r5, 0.0);
T5[gxy] = r5;
}
//!PASS 2
//!DESC conv1 (24x24)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3, T4, T5
//!OUT T6, T7, T8, T9, T10, T11
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
#define L3(x, y) V4(O(T3, x, y))
#define L4(x, y) V4(O(T4, x, y))
#define L5(x, y) V4(O(T5, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -585,18 +591,21 @@ void Pass2(uint2 blockStart, uint3 tid) {
r5 = max(r5, 0.0);
T11[gxy] = r5;
}
//!PASS 3
//!DESC conv2 (24x24)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T6, T7, T8, T9, T10, T11
//!OUT T0, T1, T2, T3, T4, T5
#define L0(x, y) V4(O(T6, x, y))
#define L1(x, y) V4(O(T7, x, y))
#define L2(x, y) V4(O(T8, x, y))
#define L3(x, y) V4(O(T9, x, y))
#define L4(x, y) V4(O(T10, x, y))
#define L5(x, y) V4(O(T11, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -961,18 +970,21 @@ void Pass3(uint2 blockStart, uint3 tid) {
r5 = max(r5, 0.0);
T5[gxy] = r5;
}
//!PASS 4
//!DESC conv3 (24x24)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3, T4, T5
//!OUT T6, T7, T8, T9, T10, T11
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
#define L3(x, y) V4(O(T3, x, y))
#define L4(x, y) V4(O(T4, x, y))
#define L5(x, y) V4(O(T5, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -1337,18 +1349,21 @@ void Pass4(uint2 blockStart, uint3 tid) {
r5 = max(r5, 0.0);
T11[gxy] = r5;
}
//!PASS 5
//!DESC conv4 (24x24)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T6, T7, T8, T9, T10, T11
//!OUT T0, T1, T2, T3, T4, T5
#define L0(x, y) V4(O(T6, x, y))
#define L1(x, y) V4(O(T7, x, y))
#define L2(x, y) V4(O(T8, x, y))
#define L3(x, y) V4(O(T9, x, y))
#define L4(x, y) V4(O(T10, x, y))
#define L5(x, y) V4(O(T11, x, y))
void Pass5(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -1713,18 +1728,21 @@ void Pass5(uint2 blockStart, uint3 tid) {
r5 = max(r5, 0.0);
T5[gxy] = r5;
}
//!PASS 6
//!DESC out-shuffle (24x12)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T0, T1, T2, T3, T4, T5
//!OUT OUTPUT
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
#define L3(x, y) V4(O(T3, x, y))
#define L4(x, y) V4(O(T4, x, y))
#define L5(x, y) V4(O(T5, x, y))
void Pass6(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-04x32
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -145,9 +146,11 @@ Texture2D T15;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
#define L0(x, y) V3(O(INPUT, x, y).rgb)
#define V3 MF3
#define M3x4 MF3x4
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -257,12 +260,14 @@ void Pass1(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T7[gxy] = r7;
}
//!PASS 2
//!DESC conv1 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3, T4, T5, T6, T7
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
@ -271,6 +276,7 @@ void Pass1(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T5, x, y))
#define L6(x, y) V4(O(T6, x, y))
#define L7(x, y) V4(O(T7, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -897,12 +903,14 @@ void Pass2(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T15[gxy] = r7;
}
//!PASS 3
//!DESC conv2 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T8, T9, T10, T11, T12, T13, T14, T15
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
#define L0(x, y) V4(O(T8, x, y))
#define L1(x, y) V4(O(T9, x, y))
#define L2(x, y) V4(O(T10, x, y))
@ -911,6 +919,7 @@ void Pass2(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T13, x, y))
#define L6(x, y) V4(O(T14, x, y))
#define L7(x, y) V4(O(T15, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -1537,12 +1546,14 @@ void Pass3(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T7[gxy] = r7;
}
//!PASS 4
//!DESC conv3 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3, T4, T5, T6, T7
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
@ -1551,6 +1562,7 @@ void Pass3(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T5, x, y))
#define L6(x, y) V4(O(T6, x, y))
#define L7(x, y) V4(O(T7, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -2177,12 +2189,14 @@ void Pass4(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T15[gxy] = r7;
}
//!PASS 5
//!DESC conv4 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T8, T9, T10, T11, T12, T13, T14, T15
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
#define L0(x, y) V4(O(T8, x, y))
#define L1(x, y) V4(O(T9, x, y))
#define L2(x, y) V4(O(T10, x, y))
@ -2191,6 +2205,7 @@ void Pass4(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T13, x, y))
#define L6(x, y) V4(O(T14, x, y))
#define L7(x, y) V4(O(T15, x, y))
void Pass5(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -2817,12 +2832,14 @@ void Pass5(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T7[gxy] = r7;
}
//!PASS 6
//!DESC out-shuffle (32x12)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T0, T1, T2, T3, T4, T5, T6, T7
//!OUT OUTPUT
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
@ -2831,6 +2848,7 @@ void Pass5(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T5, x, y))
#define L6(x, y) V4(O(T6, x, y))
#define L7(x, y) V4(O(T7, x, y))
void Pass6(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-08x32
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -145,9 +146,11 @@ Texture2D T15;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
#define L0(x, y) V3(O(INPUT, x, y).rgb)
#define V3 MF3
#define M3x4 MF3x4
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -257,12 +260,14 @@ void Pass1(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T7[gxy] = r7;
}
//!PASS 2
//!DESC conv1 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3, T4, T5, T6, T7
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
@ -271,6 +276,7 @@ void Pass1(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T5, x, y))
#define L6(x, y) V4(O(T6, x, y))
#define L7(x, y) V4(O(T7, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -897,12 +903,14 @@ void Pass2(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T15[gxy] = r7;
}
//!PASS 3
//!DESC conv2 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T8, T9, T10, T11, T12, T13, T14, T15
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
#define L0(x, y) V4(O(T8, x, y))
#define L1(x, y) V4(O(T9, x, y))
#define L2(x, y) V4(O(T10, x, y))
@ -911,6 +919,7 @@ void Pass2(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T13, x, y))
#define L6(x, y) V4(O(T14, x, y))
#define L7(x, y) V4(O(T15, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -1537,12 +1546,14 @@ void Pass3(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T7[gxy] = r7;
}
//!PASS 4
//!DESC conv3 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3, T4, T5, T6, T7
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
@ -1551,6 +1562,7 @@ void Pass3(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T5, x, y))
#define L6(x, y) V4(O(T6, x, y))
#define L7(x, y) V4(O(T7, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -2177,12 +2189,14 @@ void Pass4(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T15[gxy] = r7;
}
//!PASS 5
//!DESC conv4 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T8, T9, T10, T11, T12, T13, T14, T15
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
#define L0(x, y) V4(O(T8, x, y))
#define L1(x, y) V4(O(T9, x, y))
#define L2(x, y) V4(O(T10, x, y))
@ -2191,6 +2205,7 @@ void Pass4(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T13, x, y))
#define L6(x, y) V4(O(T14, x, y))
#define L7(x, y) V4(O(T15, x, y))
void Pass5(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -2817,12 +2832,14 @@ void Pass5(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T7[gxy] = r7;
}
//!PASS 6
//!DESC conv5 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3, T4, T5, T6, T7
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
@ -2831,6 +2848,7 @@ void Pass5(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T5, x, y))
#define L6(x, y) V4(O(T6, x, y))
#define L7(x, y) V4(O(T7, x, y))
void Pass6(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -3457,12 +3475,14 @@ void Pass6(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T15[gxy] = r7;
}
//!PASS 7
//!DESC conv6 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T8, T9, T10, T11, T12, T13, T14, T15
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
#define L0(x, y) V4(O(T8, x, y))
#define L1(x, y) V4(O(T9, x, y))
#define L2(x, y) V4(O(T10, x, y))
@ -3471,6 +3491,7 @@ void Pass6(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T13, x, y))
#define L6(x, y) V4(O(T14, x, y))
#define L7(x, y) V4(O(T15, x, y))
void Pass7(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -4097,12 +4118,14 @@ void Pass7(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T7[gxy] = r7;
}
//!PASS 8
//!DESC conv7 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2, T3, T4, T5, T6, T7
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
@ -4111,6 +4134,7 @@ void Pass7(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T5, x, y))
#define L6(x, y) V4(O(T6, x, y))
#define L7(x, y) V4(O(T7, x, y))
void Pass8(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -4737,12 +4761,14 @@ void Pass8(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T15[gxy] = r7;
}
//!PASS 9
//!DESC conv8 (32x32)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T8, T9, T10, T11, T12, T13, T14, T15
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
#define L0(x, y) V4(O(T8, x, y))
#define L1(x, y) V4(O(T9, x, y))
#define L2(x, y) V4(O(T10, x, y))
@ -4751,6 +4777,7 @@ void Pass8(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T13, x, y))
#define L6(x, y) V4(O(T14, x, y))
#define L7(x, y) V4(O(T15, x, y))
void Pass9(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -5377,12 +5404,14 @@ void Pass9(uint2 blockStart, uint3 tid) {
r7 = max(r7, 0.0);
T7[gxy] = r7;
}
//!PASS 10
//!DESC out-shuffle (32x12)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T0, T1, T2, T3, T4, T5, T6, T7
//!OUT OUTPUT
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
@ -5391,6 +5420,7 @@ void Pass9(uint2 blockStart, uint3 tid) {
#define L5(x, y) V4(O(T5, x, y))
#define L6(x, y) V4(O(T6, x, y))
#define L7(x, y) V4(O(T7, x, y))
void Pass10(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-0003
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -85,7 +86,9 @@ Texture2D T5;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1, T2
#define L0(x, y) MF(dot(MF3(0.299, 0.587, 0.114), O(INPUT, x, y).rgb))
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -135,15 +138,18 @@ void Pass1(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T2[gxy] = r2;
}
//!PASS 2
//!DESC conv1 (12x12)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1, T2
//!OUT T3, T4, T5
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
#define L2(x, y) V4(O(T2, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -250,15 +256,18 @@ void Pass2(uint2 blockStart, uint3 tid) {
r2 = max(r2, 0.0);
T5[gxy] = r2;
}
//!PASS 3
//!DESC conv2 (12x8)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T3, T4, T5
//!OUT T0, T1
#define L0(x, y) V4(O(T3, x, y))
#define L1(x, y) V4(O(T4, x, y))
#define L2(x, y) V4(O(T5, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -336,14 +345,17 @@ void Pass3(uint2 blockStart, uint3 tid) {
r1 = max(r1, 0.0);
T1[gxy] = r1;
}
//!PASS 4
//!DESC out-shuffle (8x4)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T0, T1
//!OUT OUTPUT
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-0002
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -73,7 +74,9 @@ Texture2D T3;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1
#define L0(x, y) MF(dot(MF3(0.299, 0.587, 0.114), O(INPUT, x, y).rgb))
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -111,14 +114,17 @@ void Pass1(uint2 blockStart, uint3 tid) {
r1 = max(r1, 0.0);
T1[gxy] = r1;
}
//!PASS 2
//!DESC conv1 (8x8)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1
//!OUT T2, T3
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -175,14 +181,17 @@ void Pass2(uint2 blockStart, uint3 tid) {
r1 = max(r1, 0.0);
T3[gxy] = r1;
}
//!PASS 3
//!DESC conv2 (8x8)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T2, T3
//!OUT T0, T1
#define L0(x, y) V4(O(T2, x, y))
#define L1(x, y) V4(O(T3, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -239,14 +248,17 @@ void Pass3(uint2 blockStart, uint3 tid) {
r1 = max(r1, 0.0);
T1[gxy] = r1;
}
//!PASS 4
//!DESC out-shuffle (8x4)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T0, T1
//!OUT OUTPUT
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -18,7 +18,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME CuNNy-0001
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"
@ -73,7 +74,9 @@ Texture2D T3;
//!NUM_THREADS 64
//!IN INPUT
//!OUT T0, T1
#define L0(x, y) MF(dot(MF3(0.299, 0.587, 0.114), O(INPUT, x, y).rgb))
void Pass1(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -111,14 +114,17 @@ void Pass1(uint2 blockStart, uint3 tid) {
r1 = max(r1, 0.0);
T1[gxy] = r1;
}
//!PASS 2
//!DESC conv1 (8x8)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T0, T1
//!OUT T2, T3
#define L0(x, y) V4(O(T0, x, y))
#define L1(x, y) V4(O(T1, x, y))
void Pass2(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -175,14 +181,17 @@ void Pass2(uint2 blockStart, uint3 tid) {
r1 = max(r1, 0.0);
T3[gxy] = r1;
}
//!PASS 3
//!DESC conv2 (8x4)
//!BLOCK_SIZE 8
//!NUM_THREADS 64
//!IN T2, T3
//!OUT T0
#define L0(x, y) V4(O(T2, x, y))
#define L1(x, y) V4(O(T3, x, y))
void Pass3(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = Rmp8x8(tid.x) + blockStart;
@ -219,13 +228,16 @@ void Pass3(uint2 blockStart, uint3 tid) {
r0 = max(r0, 0.0);
T0[gxy] = r0;
}
//!PASS 4
//!DESC out-shuffle (4x4)
//!BLOCK_SIZE 16
//!NUM_THREADS 64
//!IN INPUT, T0
//!OUT OUTPUT
#define L0(x, y) V4(O(T0, x, y))
void Pass4(uint2 blockStart, uint3 tid) {
float2 pt = float2(GetInputPt());
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;

View file

@ -3,7 +3,7 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -3,7 +3,7 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -4,7 +4,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -3,7 +3,8 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16, MulAdd
//!USE MulAdd
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -1,6 +1,6 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -2,7 +2,7 @@
//!MAGPIE EFFECT
//!VERSION 4
//!USE FP16
//!CAPABILITY FP16
#include "../StubDefs.hlsli"

View file

@ -266,34 +266,79 @@ static uint32_t GetNextExpr(std::string_view& source, std::string& expr) noexcep
return 0;
}
static uint32_t ResolvePassFlags(std::string_view& block, uint32_t& passFlags) noexcept {
std::string_view features;
if (GetNextString(block, features)) {
static uint32_t ResolveUseFlags(std::string_view& block, uint32_t& effectFlags) noexcept {
std::string_view flags;
if (GetNextString(block, flags)) {
return 1;
}
for (std::string_view& feature : StrHelper::Split(features, ',')) {
StrHelper::Trim(feature);
std::bitset<2> processed;
if (feature == "FP16") {
passFlags |= EffectPassFlags::UseFP16;
} else if (feature == "MulAdd") {
passFlags |= EffectPassFlags::UseMulAdd;
} else if (feature == "Dynamic") {
passFlags |= EffectPassFlags::UseDynamic;
for (std::string_view& flag : StrHelper::Split(flags, ',')) {
StrHelper::Trim(flag);
if (flag == "MulAdd") {
if (processed[0]) {
return 1;
}
processed[0] = true;
effectFlags |= EffectFlags::UseMulAdd;
} else if (flag == "Dynamic") {
if (processed[1]) {
return 1;
}
processed[1] = true;
effectFlags |= EffectFlags::UseDynamic;
} else {
Logger::Get().Warn(StrHelper::Concat("使用了未知功能: ", feature));
Logger::Get().Warn(StrHelper::Concat("使用了未知 USE 标志: ", flag));
}
}
return 0;
}
static uint32_t ResolveHeader(std::string_view block, EffectDesc& desc, uint32_t& commonPassFlags, bool noCompile) noexcept {
static uint32_t ResolveCapabilityFlags(std::string_view& block, uint32_t& effectFlags, bool noFP16) noexcept {
std::string_view flags;
if (GetNextString(block, flags)) {
return 1;
}
std::bitset<1> processed;
for (std::string_view& flag : StrHelper::Split(flags, ',')) {
StrHelper::Trim(flag);
if (flag == "FP16") {
if (processed[0]) {
return 1;
}
processed[0] = true;
effectFlags |= EffectFlags::SupportFP16;
if (!noFP16) {
effectFlags |= EffectFlags::FP16;
}
} else {
Logger::Get().Warn(StrHelper::Concat("使用了未知 CAPABILITY 标志: ", flag));
}
}
return 0;
}
static uint32_t ResolveHeader(
std::string_view block,
EffectDesc& desc,
uint32_t& effectFlags,
bool noCompile,
bool noFP16
) noexcept {
// 必需的选项: VERSION
// 可选的选项: SORT_NAME, USE
std::bitset<3> processed;
std::bitset<4> processed;
std::string_view token;
@ -345,7 +390,16 @@ static uint32_t ResolveHeader(std::string_view block, EffectDesc& desc, uint32_t
}
processed[2] = true;
if (ResolvePassFlags(block, commonPassFlags)) {
if (ResolveUseFlags(block, effectFlags)) {
return 1;
}
} else if (t == "CAPABILITY") {
if (processed[3]) {
return 1;
}
processed[3] = true;
if (ResolveCapabilityFlags(block, effectFlags, noFP16)) {
return 1;
}
} else {
@ -792,12 +846,7 @@ static uint32_t ResolveCommon(std::string_view& block) noexcept {
return 0;
}
static uint32_t ResolvePasses(
SmallVector<std::string_view>& blocks,
EffectDesc& desc,
uint32_t commonPassFlags,
bool noFP16
) noexcept {
static uint32_t ResolvePasses(SmallVector<std::string_view>& blocks, EffectDesc& desc) noexcept {
// 必选项: IN, OUT
// 可选项: BLOCK_SIZE, NUM_THREADS, STYLE, USE
// STYLE 为 PS 时不能有 BLOCK_SIZE 或 NUM_THREADS
@ -857,9 +906,6 @@ static uint32_t ResolvePasses(
std::string_view& block = blocks[i];
auto& passDesc = desc.passes[i];
// 应用头中的标志
passDesc.flags |= commonPassFlags;
// 用于检查输入和输出中重复的纹理
phmap::flat_hash_map<std::string_view, uint32_t> texNames;
texNames.reserve(desc.textures.size());
@ -1047,15 +1093,6 @@ static uint32_t ResolvePasses(
StrHelper::Trim(val);
passDesc.desc = val;
} else if (t == "USE") {
if (processed[6]) {
return 1;
}
processed[6] = true;
if (ResolvePassFlags(block, passDesc.flags)) {
return 1;
}
} else {
Logger::Get().Warn(fmt::format("解析通道 {} 时遇到未知指令: {}", i + 1, t));
}
@ -1079,10 +1116,6 @@ static uint32_t ResolvePasses(
if (passDesc.desc.empty()) {
passDesc.desc = fmt::format("Pass {}", i + 1);
}
if (noFP16) {
passDesc.flags &= ~EffectPassFlags::UseFP16;
}
}
return 0;
@ -1097,8 +1130,6 @@ static uint32_t GeneratePassSource(
std::string& result,
std::vector<std::pair<std::string, std::string>>& macros
) noexcept {
bool isInlineParams = desc.flags & EffectFlags::InlineParams;
const EffectPassDesc& passDesc = desc.passes[(size_t)passIdx - 1];
{
@ -1114,7 +1145,7 @@ static uint32_t GeneratePassSource(
// 常量缓冲区
result.append(cbHlsl);
if (passDesc.flags & EffectPassFlags::UseDynamic) {
if (desc.flags & EffectFlags::UseDynamic) {
result.append("cbuffer __CB2 : register(b1) { uint __frameCount; };\n\n");
}
@ -1162,7 +1193,7 @@ static uint32_t GeneratePassSource(
macros.emplace_back("MP_PS_STYLE", "");
}
if (isInlineParams) {
if (desc.flags & EffectFlags::InlineParams) {
macros.emplace_back("MP_INLINE_PARAMS", "");
}
@ -1172,7 +1203,7 @@ static uint32_t GeneratePassSource(
// 用于在 FP32 和 FP16 间切换的宏
static const char* numbers[] = { "1","2","3","4" };
if (passDesc.flags & EffectPassFlags::UseFP16) {
if (desc.flags & EffectFlags::FP16) {
macros.emplace_back("MP_FP16", "");
macros.emplace_back("MF", "min16float");
@ -1210,7 +1241,7 @@ float2 GetOutputPt() { return __outputPt; }
float2 GetScale() { return __scale; }
)");
if (passDesc.flags & EffectPassFlags::UseMulAdd) {
if (desc.flags & EffectFlags::UseMulAdd) {
// 使用 mad 而不是 mul经测试这可以大幅提高性能且和 FP16 的兼容性更好。
// 见 GH#1049
// result.append(R"(MF2 MulAdd(MF2 x, MF2x2 y, MF2 a) { return mul(x, y) + a; }
@ -1289,7 +1320,7 @@ MF4 MulAdd(MF4 x, MF4x4 y, MF4 a) {
)");
}
if (passDesc.flags & EffectPassFlags::UseDynamic) {
if (desc.flags & EffectFlags::UseDynamic) {
result.append(R"(uint GetFrameCount() { return __frameCount; }
)");
@ -1717,9 +1748,7 @@ uint32_t EffectCompiler::Compile(
return 1;
}
// 头中的标志将应用到所有通道
uint32_t commonPassFlags = 0;
if (ResolveHeader(headerBlock, desc, commonPassFlags, noCompile)) {
if (ResolveHeader(headerBlock, desc, desc.flags, noCompile, flags & EffectCompilerFlags::NoFP16)) {
Logger::Get().Error("解析 Header 块失败");
return 1;
}
@ -1800,7 +1829,7 @@ uint32_t EffectCompiler::Compile(
}
desc.passes.clear();
if (ResolvePasses(passBlocks, desc, commonPassFlags, flags & EffectCompilerFlags::NoFP16)) {
if (ResolvePasses(passBlocks, desc)) {
Logger::Get().Error("解析 Pass 块失败");
return 1;
}

View file

@ -561,7 +561,7 @@ ID3D11Texture2D* Renderer::_BuildEffects() noexcept {
info.passNames.emplace_back(std::move(passDesc.desc));
}
info.isFP16 = desc.passes[0].flags & EffectPassFlags::UseFP16;
info.isFP16 = desc.flags & EffectFlags::FP16;
}
// 输出尺寸大于缩放窗口尺寸则需要降采样
@ -610,26 +610,20 @@ ID3D11Texture2D* Renderer::_BuildEffects() noexcept {
// 初始化所有效果共用的动态常量缓冲区
for (const EffectDesc& effectDesc : effectDescs) {
for (const EffectPassDesc& passDesc : effectDesc.passes) {
if (passDesc.flags & EffectPassFlags::UseDynamic) {
D3D11_BUFFER_DESC bd{
.ByteWidth = 16, // 只用 4 个字节
.Usage = D3D11_USAGE_DYNAMIC,
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE
};
if (effectDesc.flags & EffectFlags::UseDynamic) {
D3D11_BUFFER_DESC bd{
.ByteWidth = 16, // 只用 4 个字节
.Usage = D3D11_USAGE_DYNAMIC,
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE
};
HRESULT hr = _backendResources.GetD3DDevice()->CreateBuffer(&bd, nullptr, _dynamicCB.put());
if (FAILED(hr)) {
Logger::Get().ComError("CreateBuffer 失败", hr);
return nullptr;
}
break;
HRESULT hr = _backendResources.GetD3DDevice()->CreateBuffer(&bd, nullptr, _dynamicCB.put());
if (FAILED(hr)) {
Logger::Get().ComError("CreateBuffer 失败", hr);
return nullptr;
}
}
if (_dynamicCB) {
break;
}
}

View file

@ -70,9 +70,6 @@ struct EffectParameterDesc {
struct EffectPassFlags {
static constexpr uint32_t PSStyle = 1;
static constexpr uint32_t UseFP16 = 1 << 1;
static constexpr uint32_t UseMulAdd = 1 << 2;
static constexpr uint32_t UseDynamic = 1 << 3;
};
struct EffectPassDesc {
@ -86,17 +83,23 @@ struct EffectPassDesc {
};
struct EffectFlags {
static constexpr uint32_t InlineParams = 1;
// 效果本身的属性
static constexpr uint32_t UseDynamic = 1;
static constexpr uint32_t UseMulAdd = 1 << 1;
static constexpr uint32_t SupportFP16 = 1 << 2;
// 编译赋予的属性
static constexpr uint32_t InlineParams = 1 << 16;
static constexpr uint32_t FP16 = 1 << 17;
};
struct EffectDesc {
std::string name;
std::string sortName; // 仅供 UI 使用
const std::pair<std::string, std::string>& GetOutputSizeExpr() const noexcept {
return textures[1].sizeExpr;
}
std::string name;
std::string sortName; // 仅供 UI 使用
std::vector<EffectParameterDesc> params;
// 0: INPUT
// 1: OUTPUT