mirror of
https://github.com/Blinue/Magpie.git
synced 2026-06-24 02:04:10 +00:00
[FX] 添加 CAPABILITY 指令 (#1164)
* feat: FP16 归类到 CAPABILITY * feat: 效果本身的属性和编译赋予的属性分开 * feat: USE 和 CAPABILITY 禁止重复的标志 * docs: 更新文档
This commit is contained in:
parent
fb6ae46734
commit
a32ea649dc
65 changed files with 369 additions and 140 deletions
|
|
@ -4,11 +4,13 @@ MagpieFX is based on DirectX 11 compute shader
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
// Use the "USE" directive to declare the features being utilized. The following values can be combined:
|
||||
// FP16: Declares support for FP16. However, this does not guarantee FP16 will be used. If the GPU
|
||||
// does not support FP16 or the user has disabled it, this declaration has no effect.
|
||||
// MulAdd: Enables the "MulAdd" function.
|
||||
// Dynamic: Enables the "GetFrameCount" function.
|
||||
//!USE FP16, MulAdd, Dynamic
|
||||
//!USE MulAdd, Dynamic
|
||||
// Use the "CAPABILITY" directive to declare the capabilities supported by this effect. Whether they
|
||||
// are enabled depends on user configuration. The following values can be combined:
|
||||
// FP16: Declares support for FP16.
|
||||
//!CAPABILITY FP16
|
||||
// Use "SORT_NAME" to specify the name used for sorting, otherwise the files will be sorted by their file
|
||||
// names.
|
||||
//!SORT_NAME test1
|
||||
|
|
@ -172,7 +174,7 @@ void Pass2(uint2 blockStart, uint3 threadId) {
|
|||
|
||||
**MP_DEBUG**: Whether the shader is being compiled in debug mode (when compiling shaders in debug mode, they are not optimized and contain debug information).
|
||||
|
||||
**MP_FP16**: Whether to use half-precision floating-point numbers (specifed by user).
|
||||
**MP_FP16**: Whether to use half-precision floating-point numbers.
|
||||
|
||||
**MF, MF1, MF2, ..., MF4x4**: Floating-point data types that conform to MP_FP16. When half-precision is not specified, they are aliases for float..., otherwise they are aliases for min16float...
|
||||
|
||||
|
|
|
|||
|
|
@ -4,10 +4,12 @@ MagpieFX 基于 DirectX 11 计算着色器
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
// 使用 USE 指令声明使用的功能,支持以下值的组合:
|
||||
// FP16:声明对 FP16 的支持。注意这不能保证一定使用 FP16,如果 GPU 不支持 FP16 或者用户禁用了 FP16,这个声明没有效果
|
||||
// MulAdd:使 MulAdd 函数可用
|
||||
// Dynamic:使 GetFrameCount 函数可用
|
||||
//!USE FP16, MulAdd, Dynamic
|
||||
//!USE MulAdd, Dynamic
|
||||
// 使用 CAPABILITY 指令声明效果所支持的技术,但是否使用这些技术取决于用户配置。支持以下值的组合:
|
||||
// FP16:声明对 FP16 的支持
|
||||
//!CAPABILITY FP16
|
||||
// 使用 SORT_NAME 指定排序时使用的名字,否则按照文件名排序
|
||||
//!SORT_NAME test1
|
||||
|
||||
|
|
@ -165,7 +167,7 @@ void Pass2(uint2 blockStart, uint3 threadId) {
|
|||
|
||||
**MP_DEBUG**:当前是否为调试模式(调试模式下编译的着色器不进行优化且含有调试信息)
|
||||
|
||||
**MP_FP16**:当前是否使用半精度浮点数(由用户指定)
|
||||
**MP_FP16**:当前是否使用半精度浮点数
|
||||
|
||||
**MF、MF1、MF2、...、MF4x4**:遵守 fp16 参数的浮点数类型。当未指定 fp16,它们为 float... 的别名,否则为 min16float... 的别名
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_3D_Upscale_1
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_3D_Upscale_0
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_2
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_1
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_0
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_Soft_2
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_Soft_1
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_Soft_0
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_Soft_4
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_Soft_3
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_4
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Restore_3
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_Denoise_1
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_Denoise_0
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_Denoise_3
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_Denoise_2
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_GAN_x2_2
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_GAN_x2_1
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_1
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_0
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_3
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME Anime4K_Upscale_2
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D16N16
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D16N16
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D04N02
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D04N02
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D04N03
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D04N03
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D16N04
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D16N04
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D04N04
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D04N04
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D08N04
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D08N04
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D08N06
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D08N06
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D16N08
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D16N08
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D04N08
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D04N08
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-DN-D08N08
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-D08N08
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-03x12
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -85,9 +86,11 @@ Texture2D T5;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1, T2
|
||||
|
||||
#define L0(x, y) V3(O(INPUT, x, y).rgb)
|
||||
#define V3 MF3
|
||||
#define M3x4 MF3x4
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -137,15 +140,18 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T2[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (12x12)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2
|
||||
//!OUT T3, T4, T5
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -252,15 +258,18 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T5[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (12x12)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T3, T4, T5
|
||||
//!OUT T0, T1, T2
|
||||
|
||||
#define L0(x, y) V4(O(T3, x, y))
|
||||
#define L1(x, y) V4(O(T4, x, y))
|
||||
#define L2(x, y) V4(O(T5, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -367,15 +376,18 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T2[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC conv3 (12x12)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2
|
||||
//!OUT T3, T4, T5
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -482,15 +494,18 @@ void Pass4(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T5[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 5
|
||||
//!DESC out-shuffle (12x12)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T3, T4, T5
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T3, x, y))
|
||||
#define L1(x, y) V4(O(T4, x, y))
|
||||
#define L2(x, y) V4(O(T5, x, y))
|
||||
|
||||
void Pass5(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-04x12
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -85,9 +86,11 @@ Texture2D T5;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1, T2
|
||||
|
||||
#define L0(x, y) V3(O(INPUT, x, y).rgb)
|
||||
#define V3 MF3
|
||||
#define M3x4 MF3x4
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -137,15 +140,18 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T2[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (12x12)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2
|
||||
//!OUT T3, T4, T5
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -252,15 +258,18 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T5[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (12x12)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T3, T4, T5
|
||||
//!OUT T0, T1, T2
|
||||
|
||||
#define L0(x, y) V4(O(T3, x, y))
|
||||
#define L1(x, y) V4(O(T4, x, y))
|
||||
#define L2(x, y) V4(O(T5, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -367,15 +376,18 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T2[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC conv3 (12x12)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2
|
||||
//!OUT T3, T4, T5
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -482,15 +494,18 @@ void Pass4(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T5[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 5
|
||||
//!DESC conv4 (12x12)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T3, T4, T5
|
||||
//!OUT T0, T1, T2
|
||||
|
||||
#define L0(x, y) V4(O(T3, x, y))
|
||||
#define L1(x, y) V4(O(T4, x, y))
|
||||
#define L2(x, y) V4(O(T5, x, y))
|
||||
|
||||
void Pass5(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -597,15 +612,18 @@ void Pass5(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T2[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 6
|
||||
//!DESC out-shuffle (12x12)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T0, T1, T2
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
||||
void Pass6(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-04x16
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -97,9 +98,11 @@ Texture2D T7;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1, T2, T3
|
||||
|
||||
#define L0(x, y) V3(O(INPUT, x, y).rgb)
|
||||
#define V3 MF3
|
||||
#define M3x4 MF3x4
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -161,16 +164,19 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r3 = max(r3, 0.0);
|
||||
T3[gxy] = r3;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (16x16)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3
|
||||
//!OUT T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
#define L3(x, y) V4(O(T3, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -345,16 +351,19 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r3 = max(r3, 0.0);
|
||||
T7[gxy] = r3;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (16x16)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T4, T5, T6, T7
|
||||
//!OUT T0, T1, T2, T3
|
||||
|
||||
#define L0(x, y) V4(O(T4, x, y))
|
||||
#define L1(x, y) V4(O(T5, x, y))
|
||||
#define L2(x, y) V4(O(T6, x, y))
|
||||
#define L3(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -529,16 +538,19 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r3 = max(r3, 0.0);
|
||||
T3[gxy] = r3;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC conv3 (16x16)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3
|
||||
//!OUT T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
#define L3(x, y) V4(O(T3, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -713,16 +725,19 @@ void Pass4(uint2 blockStart, uint3 tid) {
|
|||
r3 = max(r3, 0.0);
|
||||
T7[gxy] = r3;
|
||||
}
|
||||
|
||||
//!PASS 5
|
||||
//!DESC conv4 (16x16)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T4, T5, T6, T7
|
||||
//!OUT T0, T1, T2, T3
|
||||
|
||||
#define L0(x, y) V4(O(T4, x, y))
|
||||
#define L1(x, y) V4(O(T5, x, y))
|
||||
#define L2(x, y) V4(O(T6, x, y))
|
||||
#define L3(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass5(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -897,16 +912,19 @@ void Pass5(uint2 blockStart, uint3 tid) {
|
|||
r3 = max(r3, 0.0);
|
||||
T3[gxy] = r3;
|
||||
}
|
||||
|
||||
//!PASS 6
|
||||
//!DESC out-shuffle (16x12)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T0, T1, T2, T3
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
#define L3(x, y) V4(O(T3, x, y))
|
||||
|
||||
void Pass6(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-04x24
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -121,9 +122,11 @@ Texture2D T11;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1, T2, T3, T4, T5
|
||||
|
||||
#define L0(x, y) V3(O(INPUT, x, y).rgb)
|
||||
#define V3 MF3
|
||||
#define M3x4 MF3x4
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -209,18 +212,21 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r5 = max(r5, 0.0);
|
||||
T5[gxy] = r5;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (24x24)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3, T4, T5
|
||||
//!OUT T6, T7, T8, T9, T10, T11
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
#define L3(x, y) V4(O(T3, x, y))
|
||||
#define L4(x, y) V4(O(T4, x, y))
|
||||
#define L5(x, y) V4(O(T5, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -585,18 +591,21 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r5 = max(r5, 0.0);
|
||||
T11[gxy] = r5;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (24x24)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T6, T7, T8, T9, T10, T11
|
||||
//!OUT T0, T1, T2, T3, T4, T5
|
||||
|
||||
#define L0(x, y) V4(O(T6, x, y))
|
||||
#define L1(x, y) V4(O(T7, x, y))
|
||||
#define L2(x, y) V4(O(T8, x, y))
|
||||
#define L3(x, y) V4(O(T9, x, y))
|
||||
#define L4(x, y) V4(O(T10, x, y))
|
||||
#define L5(x, y) V4(O(T11, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -961,18 +970,21 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r5 = max(r5, 0.0);
|
||||
T5[gxy] = r5;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC conv3 (24x24)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3, T4, T5
|
||||
//!OUT T6, T7, T8, T9, T10, T11
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
#define L3(x, y) V4(O(T3, x, y))
|
||||
#define L4(x, y) V4(O(T4, x, y))
|
||||
#define L5(x, y) V4(O(T5, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -1337,18 +1349,21 @@ void Pass4(uint2 blockStart, uint3 tid) {
|
|||
r5 = max(r5, 0.0);
|
||||
T11[gxy] = r5;
|
||||
}
|
||||
|
||||
//!PASS 5
|
||||
//!DESC conv4 (24x24)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T6, T7, T8, T9, T10, T11
|
||||
//!OUT T0, T1, T2, T3, T4, T5
|
||||
|
||||
#define L0(x, y) V4(O(T6, x, y))
|
||||
#define L1(x, y) V4(O(T7, x, y))
|
||||
#define L2(x, y) V4(O(T8, x, y))
|
||||
#define L3(x, y) V4(O(T9, x, y))
|
||||
#define L4(x, y) V4(O(T10, x, y))
|
||||
#define L5(x, y) V4(O(T11, x, y))
|
||||
|
||||
void Pass5(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -1713,18 +1728,21 @@ void Pass5(uint2 blockStart, uint3 tid) {
|
|||
r5 = max(r5, 0.0);
|
||||
T5[gxy] = r5;
|
||||
}
|
||||
|
||||
//!PASS 6
|
||||
//!DESC out-shuffle (24x12)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T0, T1, T2, T3, T4, T5
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
#define L3(x, y) V4(O(T3, x, y))
|
||||
#define L4(x, y) V4(O(T4, x, y))
|
||||
#define L5(x, y) V4(O(T5, x, y))
|
||||
|
||||
void Pass6(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-04x32
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -145,9 +146,11 @@ Texture2D T15;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V3(O(INPUT, x, y).rgb)
|
||||
#define V3 MF3
|
||||
#define M3x4 MF3x4
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -257,12 +260,14 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T7[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3, T4, T5, T6, T7
|
||||
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
|
@ -271,6 +276,7 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T5, x, y))
|
||||
#define L6(x, y) V4(O(T6, x, y))
|
||||
#define L7(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -897,12 +903,14 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T15[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T8, T9, T10, T11, T12, T13, T14, T15
|
||||
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V4(O(T8, x, y))
|
||||
#define L1(x, y) V4(O(T9, x, y))
|
||||
#define L2(x, y) V4(O(T10, x, y))
|
||||
|
|
@ -911,6 +919,7 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T13, x, y))
|
||||
#define L6(x, y) V4(O(T14, x, y))
|
||||
#define L7(x, y) V4(O(T15, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -1537,12 +1546,14 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T7[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC conv3 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3, T4, T5, T6, T7
|
||||
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
|
@ -1551,6 +1562,7 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T5, x, y))
|
||||
#define L6(x, y) V4(O(T6, x, y))
|
||||
#define L7(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -2177,12 +2189,14 @@ void Pass4(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T15[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 5
|
||||
//!DESC conv4 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T8, T9, T10, T11, T12, T13, T14, T15
|
||||
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V4(O(T8, x, y))
|
||||
#define L1(x, y) V4(O(T9, x, y))
|
||||
#define L2(x, y) V4(O(T10, x, y))
|
||||
|
|
@ -2191,6 +2205,7 @@ void Pass4(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T13, x, y))
|
||||
#define L6(x, y) V4(O(T14, x, y))
|
||||
#define L7(x, y) V4(O(T15, x, y))
|
||||
|
||||
void Pass5(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -2817,12 +2832,14 @@ void Pass5(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T7[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 6
|
||||
//!DESC out-shuffle (32x12)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T0, T1, T2, T3, T4, T5, T6, T7
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
|
@ -2831,6 +2848,7 @@ void Pass5(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T5, x, y))
|
||||
#define L6(x, y) V4(O(T6, x, y))
|
||||
#define L7(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass6(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-08x32
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -145,9 +146,11 @@ Texture2D T15;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V3(O(INPUT, x, y).rgb)
|
||||
#define V3 MF3
|
||||
#define M3x4 MF3x4
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -257,12 +260,14 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T7[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3, T4, T5, T6, T7
|
||||
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
|
@ -271,6 +276,7 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T5, x, y))
|
||||
#define L6(x, y) V4(O(T6, x, y))
|
||||
#define L7(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -897,12 +903,14 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T15[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T8, T9, T10, T11, T12, T13, T14, T15
|
||||
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V4(O(T8, x, y))
|
||||
#define L1(x, y) V4(O(T9, x, y))
|
||||
#define L2(x, y) V4(O(T10, x, y))
|
||||
|
|
@ -911,6 +919,7 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T13, x, y))
|
||||
#define L6(x, y) V4(O(T14, x, y))
|
||||
#define L7(x, y) V4(O(T15, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -1537,12 +1546,14 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T7[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC conv3 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3, T4, T5, T6, T7
|
||||
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
|
@ -1551,6 +1562,7 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T5, x, y))
|
||||
#define L6(x, y) V4(O(T6, x, y))
|
||||
#define L7(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -2177,12 +2189,14 @@ void Pass4(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T15[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 5
|
||||
//!DESC conv4 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T8, T9, T10, T11, T12, T13, T14, T15
|
||||
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V4(O(T8, x, y))
|
||||
#define L1(x, y) V4(O(T9, x, y))
|
||||
#define L2(x, y) V4(O(T10, x, y))
|
||||
|
|
@ -2191,6 +2205,7 @@ void Pass4(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T13, x, y))
|
||||
#define L6(x, y) V4(O(T14, x, y))
|
||||
#define L7(x, y) V4(O(T15, x, y))
|
||||
|
||||
void Pass5(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -2817,12 +2832,14 @@ void Pass5(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T7[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 6
|
||||
//!DESC conv5 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3, T4, T5, T6, T7
|
||||
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
|
@ -2831,6 +2848,7 @@ void Pass5(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T5, x, y))
|
||||
#define L6(x, y) V4(O(T6, x, y))
|
||||
#define L7(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass6(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -3457,12 +3475,14 @@ void Pass6(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T15[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 7
|
||||
//!DESC conv6 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T8, T9, T10, T11, T12, T13, T14, T15
|
||||
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V4(O(T8, x, y))
|
||||
#define L1(x, y) V4(O(T9, x, y))
|
||||
#define L2(x, y) V4(O(T10, x, y))
|
||||
|
|
@ -3471,6 +3491,7 @@ void Pass6(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T13, x, y))
|
||||
#define L6(x, y) V4(O(T14, x, y))
|
||||
#define L7(x, y) V4(O(T15, x, y))
|
||||
|
||||
void Pass7(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -4097,12 +4118,14 @@ void Pass7(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T7[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 8
|
||||
//!DESC conv7 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2, T3, T4, T5, T6, T7
|
||||
//!OUT T8, T9, T10, T11, T12, T13, T14, T15
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
|
@ -4111,6 +4134,7 @@ void Pass7(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T5, x, y))
|
||||
#define L6(x, y) V4(O(T6, x, y))
|
||||
#define L7(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass8(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -4737,12 +4761,14 @@ void Pass8(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T15[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 9
|
||||
//!DESC conv8 (32x32)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T8, T9, T10, T11, T12, T13, T14, T15
|
||||
//!OUT T0, T1, T2, T3, T4, T5, T6, T7
|
||||
|
||||
#define L0(x, y) V4(O(T8, x, y))
|
||||
#define L1(x, y) V4(O(T9, x, y))
|
||||
#define L2(x, y) V4(O(T10, x, y))
|
||||
|
|
@ -4751,6 +4777,7 @@ void Pass8(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T13, x, y))
|
||||
#define L6(x, y) V4(O(T14, x, y))
|
||||
#define L7(x, y) V4(O(T15, x, y))
|
||||
|
||||
void Pass9(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -5377,12 +5404,14 @@ void Pass9(uint2 blockStart, uint3 tid) {
|
|||
r7 = max(r7, 0.0);
|
||||
T7[gxy] = r7;
|
||||
}
|
||||
|
||||
//!PASS 10
|
||||
//!DESC out-shuffle (32x12)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T0, T1, T2, T3, T4, T5, T6, T7
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
|
@ -5391,6 +5420,7 @@ void Pass9(uint2 blockStart, uint3 tid) {
|
|||
#define L5(x, y) V4(O(T5, x, y))
|
||||
#define L6(x, y) V4(O(T6, x, y))
|
||||
#define L7(x, y) V4(O(T7, x, y))
|
||||
|
||||
void Pass10(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-0003
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -85,7 +86,9 @@ Texture2D T5;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1, T2
|
||||
|
||||
#define L0(x, y) MF(dot(MF3(0.299, 0.587, 0.114), O(INPUT, x, y).rgb))
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -135,15 +138,18 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T2[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (12x12)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1, T2
|
||||
//!OUT T3, T4, T5
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
#define L2(x, y) V4(O(T2, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -250,15 +256,18 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r2 = max(r2, 0.0);
|
||||
T5[gxy] = r2;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (12x8)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T3, T4, T5
|
||||
//!OUT T0, T1
|
||||
|
||||
#define L0(x, y) V4(O(T3, x, y))
|
||||
#define L1(x, y) V4(O(T4, x, y))
|
||||
#define L2(x, y) V4(O(T5, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -336,14 +345,17 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r1 = max(r1, 0.0);
|
||||
T1[gxy] = r1;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC out-shuffle (8x4)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T0, T1
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-0002
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -73,7 +74,9 @@ Texture2D T3;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1
|
||||
|
||||
#define L0(x, y) MF(dot(MF3(0.299, 0.587, 0.114), O(INPUT, x, y).rgb))
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -111,14 +114,17 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r1 = max(r1, 0.0);
|
||||
T1[gxy] = r1;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (8x8)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1
|
||||
//!OUT T2, T3
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -175,14 +181,17 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r1 = max(r1, 0.0);
|
||||
T3[gxy] = r1;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (8x8)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T2, T3
|
||||
//!OUT T0, T1
|
||||
|
||||
#define L0(x, y) V4(O(T2, x, y))
|
||||
#define L1(x, y) V4(O(T3, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -239,14 +248,17 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r1 = max(r1, 0.0);
|
||||
T1[gxy] = r1;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC out-shuffle (8x4)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T0, T1
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!SORT_NAME CuNNy-0001
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
@ -73,7 +74,9 @@ Texture2D T3;
|
|||
//!NUM_THREADS 64
|
||||
//!IN INPUT
|
||||
//!OUT T0, T1
|
||||
|
||||
#define L0(x, y) MF(dot(MF3(0.299, 0.587, 0.114), O(INPUT, x, y).rgb))
|
||||
|
||||
void Pass1(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -111,14 +114,17 @@ void Pass1(uint2 blockStart, uint3 tid) {
|
|||
r1 = max(r1, 0.0);
|
||||
T1[gxy] = r1;
|
||||
}
|
||||
|
||||
//!PASS 2
|
||||
//!DESC conv1 (8x8)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T0, T1
|
||||
//!OUT T2, T3
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
#define L1(x, y) V4(O(T1, x, y))
|
||||
|
||||
void Pass2(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -175,14 +181,17 @@ void Pass2(uint2 blockStart, uint3 tid) {
|
|||
r1 = max(r1, 0.0);
|
||||
T3[gxy] = r1;
|
||||
}
|
||||
|
||||
//!PASS 3
|
||||
//!DESC conv2 (8x4)
|
||||
//!BLOCK_SIZE 8
|
||||
//!NUM_THREADS 64
|
||||
//!IN T2, T3
|
||||
//!OUT T0
|
||||
|
||||
#define L0(x, y) V4(O(T2, x, y))
|
||||
#define L1(x, y) V4(O(T3, x, y))
|
||||
|
||||
void Pass3(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = Rmp8x8(tid.x) + blockStart;
|
||||
|
|
@ -219,13 +228,16 @@ void Pass3(uint2 blockStart, uint3 tid) {
|
|||
r0 = max(r0, 0.0);
|
||||
T0[gxy] = r0;
|
||||
}
|
||||
|
||||
//!PASS 4
|
||||
//!DESC out-shuffle (4x4)
|
||||
//!BLOCK_SIZE 16
|
||||
//!NUM_THREADS 64
|
||||
//!IN INPUT, T0
|
||||
//!OUT OUTPUT
|
||||
|
||||
#define L0(x, y) V4(O(T0, x, y))
|
||||
|
||||
void Pass4(uint2 blockStart, uint3 tid) {
|
||||
float2 pt = float2(GetInputPt());
|
||||
uint2 gxy = (Rmp8x8(tid.x) << 1) + blockStart;
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,8 @@
|
|||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16, MulAdd
|
||||
//!USE MulAdd
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
//!MAGPIE EFFECT
|
||||
//!VERSION 4
|
||||
//!USE FP16
|
||||
//!CAPABILITY FP16
|
||||
|
||||
#include "../StubDefs.hlsli"
|
||||
|
||||
|
|
|
|||
|
|
@ -266,34 +266,79 @@ static uint32_t GetNextExpr(std::string_view& source, std::string& expr) noexcep
|
|||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t ResolvePassFlags(std::string_view& block, uint32_t& passFlags) noexcept {
|
||||
std::string_view features;
|
||||
if (GetNextString(block, features)) {
|
||||
static uint32_t ResolveUseFlags(std::string_view& block, uint32_t& effectFlags) noexcept {
|
||||
std::string_view flags;
|
||||
if (GetNextString(block, flags)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (std::string_view& feature : StrHelper::Split(features, ',')) {
|
||||
StrHelper::Trim(feature);
|
||||
std::bitset<2> processed;
|
||||
|
||||
if (feature == "FP16") {
|
||||
passFlags |= EffectPassFlags::UseFP16;
|
||||
} else if (feature == "MulAdd") {
|
||||
passFlags |= EffectPassFlags::UseMulAdd;
|
||||
} else if (feature == "Dynamic") {
|
||||
passFlags |= EffectPassFlags::UseDynamic;
|
||||
for (std::string_view& flag : StrHelper::Split(flags, ',')) {
|
||||
StrHelper::Trim(flag);
|
||||
|
||||
if (flag == "MulAdd") {
|
||||
if (processed[0]) {
|
||||
return 1;
|
||||
}
|
||||
processed[0] = true;
|
||||
|
||||
effectFlags |= EffectFlags::UseMulAdd;
|
||||
} else if (flag == "Dynamic") {
|
||||
if (processed[1]) {
|
||||
return 1;
|
||||
}
|
||||
processed[1] = true;
|
||||
|
||||
effectFlags |= EffectFlags::UseDynamic;
|
||||
} else {
|
||||
Logger::Get().Warn(StrHelper::Concat("使用了未知功能: ", feature));
|
||||
Logger::Get().Warn(StrHelper::Concat("使用了未知 USE 标志: ", flag));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t ResolveHeader(std::string_view block, EffectDesc& desc, uint32_t& commonPassFlags, bool noCompile) noexcept {
|
||||
static uint32_t ResolveCapabilityFlags(std::string_view& block, uint32_t& effectFlags, bool noFP16) noexcept {
|
||||
std::string_view flags;
|
||||
if (GetNextString(block, flags)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::bitset<1> processed;
|
||||
|
||||
for (std::string_view& flag : StrHelper::Split(flags, ',')) {
|
||||
StrHelper::Trim(flag);
|
||||
|
||||
if (flag == "FP16") {
|
||||
if (processed[0]) {
|
||||
return 1;
|
||||
}
|
||||
processed[0] = true;
|
||||
|
||||
effectFlags |= EffectFlags::SupportFP16;
|
||||
if (!noFP16) {
|
||||
effectFlags |= EffectFlags::FP16;
|
||||
}
|
||||
} else {
|
||||
Logger::Get().Warn(StrHelper::Concat("使用了未知 CAPABILITY 标志: ", flag));
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t ResolveHeader(
|
||||
std::string_view block,
|
||||
EffectDesc& desc,
|
||||
uint32_t& effectFlags,
|
||||
bool noCompile,
|
||||
bool noFP16
|
||||
) noexcept {
|
||||
// 必需的选项: VERSION
|
||||
// 可选的选项: SORT_NAME, USE
|
||||
|
||||
std::bitset<3> processed;
|
||||
std::bitset<4> processed;
|
||||
|
||||
std::string_view token;
|
||||
|
||||
|
|
@ -345,7 +390,16 @@ static uint32_t ResolveHeader(std::string_view block, EffectDesc& desc, uint32_t
|
|||
}
|
||||
processed[2] = true;
|
||||
|
||||
if (ResolvePassFlags(block, commonPassFlags)) {
|
||||
if (ResolveUseFlags(block, effectFlags)) {
|
||||
return 1;
|
||||
}
|
||||
} else if (t == "CAPABILITY") {
|
||||
if (processed[3]) {
|
||||
return 1;
|
||||
}
|
||||
processed[3] = true;
|
||||
|
||||
if (ResolveCapabilityFlags(block, effectFlags, noFP16)) {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
|
|
@ -792,12 +846,7 @@ static uint32_t ResolveCommon(std::string_view& block) noexcept {
|
|||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t ResolvePasses(
|
||||
SmallVector<std::string_view>& blocks,
|
||||
EffectDesc& desc,
|
||||
uint32_t commonPassFlags,
|
||||
bool noFP16
|
||||
) noexcept {
|
||||
static uint32_t ResolvePasses(SmallVector<std::string_view>& blocks, EffectDesc& desc) noexcept {
|
||||
// 必选项: IN, OUT
|
||||
// 可选项: BLOCK_SIZE, NUM_THREADS, STYLE, USE
|
||||
// STYLE 为 PS 时不能有 BLOCK_SIZE 或 NUM_THREADS
|
||||
|
|
@ -857,9 +906,6 @@ static uint32_t ResolvePasses(
|
|||
std::string_view& block = blocks[i];
|
||||
auto& passDesc = desc.passes[i];
|
||||
|
||||
// 应用头中的标志
|
||||
passDesc.flags |= commonPassFlags;
|
||||
|
||||
// 用于检查输入和输出中重复的纹理
|
||||
phmap::flat_hash_map<std::string_view, uint32_t> texNames;
|
||||
texNames.reserve(desc.textures.size());
|
||||
|
|
@ -1047,15 +1093,6 @@ static uint32_t ResolvePasses(
|
|||
|
||||
StrHelper::Trim(val);
|
||||
passDesc.desc = val;
|
||||
} else if (t == "USE") {
|
||||
if (processed[6]) {
|
||||
return 1;
|
||||
}
|
||||
processed[6] = true;
|
||||
|
||||
if (ResolvePassFlags(block, passDesc.flags)) {
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
Logger::Get().Warn(fmt::format("解析通道 {} 时遇到未知指令: {}", i + 1, t));
|
||||
}
|
||||
|
|
@ -1079,10 +1116,6 @@ static uint32_t ResolvePasses(
|
|||
if (passDesc.desc.empty()) {
|
||||
passDesc.desc = fmt::format("Pass {}", i + 1);
|
||||
}
|
||||
|
||||
if (noFP16) {
|
||||
passDesc.flags &= ~EffectPassFlags::UseFP16;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -1097,8 +1130,6 @@ static uint32_t GeneratePassSource(
|
|||
std::string& result,
|
||||
std::vector<std::pair<std::string, std::string>>& macros
|
||||
) noexcept {
|
||||
bool isInlineParams = desc.flags & EffectFlags::InlineParams;
|
||||
|
||||
const EffectPassDesc& passDesc = desc.passes[(size_t)passIdx - 1];
|
||||
|
||||
{
|
||||
|
|
@ -1114,7 +1145,7 @@ static uint32_t GeneratePassSource(
|
|||
// 常量缓冲区
|
||||
result.append(cbHlsl);
|
||||
|
||||
if (passDesc.flags & EffectPassFlags::UseDynamic) {
|
||||
if (desc.flags & EffectFlags::UseDynamic) {
|
||||
result.append("cbuffer __CB2 : register(b1) { uint __frameCount; };\n\n");
|
||||
}
|
||||
|
||||
|
|
@ -1162,7 +1193,7 @@ static uint32_t GeneratePassSource(
|
|||
macros.emplace_back("MP_PS_STYLE", "");
|
||||
}
|
||||
|
||||
if (isInlineParams) {
|
||||
if (desc.flags & EffectFlags::InlineParams) {
|
||||
macros.emplace_back("MP_INLINE_PARAMS", "");
|
||||
}
|
||||
|
||||
|
|
@ -1172,7 +1203,7 @@ static uint32_t GeneratePassSource(
|
|||
|
||||
// 用于在 FP32 和 FP16 间切换的宏
|
||||
static const char* numbers[] = { "1","2","3","4" };
|
||||
if (passDesc.flags & EffectPassFlags::UseFP16) {
|
||||
if (desc.flags & EffectFlags::FP16) {
|
||||
macros.emplace_back("MP_FP16", "");
|
||||
macros.emplace_back("MF", "min16float");
|
||||
|
||||
|
|
@ -1210,7 +1241,7 @@ float2 GetOutputPt() { return __outputPt; }
|
|||
float2 GetScale() { return __scale; }
|
||||
)");
|
||||
|
||||
if (passDesc.flags & EffectPassFlags::UseMulAdd) {
|
||||
if (desc.flags & EffectFlags::UseMulAdd) {
|
||||
// 使用 mad 而不是 mul,经测试这可以大幅提高性能,且和 FP16 的兼容性更好。
|
||||
// 见 GH#1049
|
||||
// result.append(R"(MF2 MulAdd(MF2 x, MF2x2 y, MF2 a) { return mul(x, y) + a; }
|
||||
|
|
@ -1289,7 +1320,7 @@ MF4 MulAdd(MF4 x, MF4x4 y, MF4 a) {
|
|||
)");
|
||||
}
|
||||
|
||||
if (passDesc.flags & EffectPassFlags::UseDynamic) {
|
||||
if (desc.flags & EffectFlags::UseDynamic) {
|
||||
result.append(R"(uint GetFrameCount() { return __frameCount; }
|
||||
|
||||
)");
|
||||
|
|
@ -1717,9 +1748,7 @@ uint32_t EffectCompiler::Compile(
|
|||
return 1;
|
||||
}
|
||||
|
||||
// 头中的标志将应用到所有通道
|
||||
uint32_t commonPassFlags = 0;
|
||||
if (ResolveHeader(headerBlock, desc, commonPassFlags, noCompile)) {
|
||||
if (ResolveHeader(headerBlock, desc, desc.flags, noCompile, flags & EffectCompilerFlags::NoFP16)) {
|
||||
Logger::Get().Error("解析 Header 块失败");
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -1800,7 +1829,7 @@ uint32_t EffectCompiler::Compile(
|
|||
}
|
||||
|
||||
desc.passes.clear();
|
||||
if (ResolvePasses(passBlocks, desc, commonPassFlags, flags & EffectCompilerFlags::NoFP16)) {
|
||||
if (ResolvePasses(passBlocks, desc)) {
|
||||
Logger::Get().Error("解析 Pass 块失败");
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -561,7 +561,7 @@ ID3D11Texture2D* Renderer::_BuildEffects() noexcept {
|
|||
info.passNames.emplace_back(std::move(passDesc.desc));
|
||||
}
|
||||
|
||||
info.isFP16 = desc.passes[0].flags & EffectPassFlags::UseFP16;
|
||||
info.isFP16 = desc.flags & EffectFlags::FP16;
|
||||
}
|
||||
|
||||
// 输出尺寸大于缩放窗口尺寸则需要降采样
|
||||
|
|
@ -610,26 +610,20 @@ ID3D11Texture2D* Renderer::_BuildEffects() noexcept {
|
|||
|
||||
// 初始化所有效果共用的动态常量缓冲区
|
||||
for (const EffectDesc& effectDesc : effectDescs) {
|
||||
for (const EffectPassDesc& passDesc : effectDesc.passes) {
|
||||
if (passDesc.flags & EffectPassFlags::UseDynamic) {
|
||||
D3D11_BUFFER_DESC bd{
|
||||
.ByteWidth = 16, // 只用 4 个字节
|
||||
.Usage = D3D11_USAGE_DYNAMIC,
|
||||
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
|
||||
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE
|
||||
};
|
||||
if (effectDesc.flags & EffectFlags::UseDynamic) {
|
||||
D3D11_BUFFER_DESC bd{
|
||||
.ByteWidth = 16, // 只用 4 个字节
|
||||
.Usage = D3D11_USAGE_DYNAMIC,
|
||||
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
|
||||
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE
|
||||
};
|
||||
|
||||
HRESULT hr = _backendResources.GetD3DDevice()->CreateBuffer(&bd, nullptr, _dynamicCB.put());
|
||||
if (FAILED(hr)) {
|
||||
Logger::Get().ComError("CreateBuffer 失败", hr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
break;
|
||||
HRESULT hr = _backendResources.GetD3DDevice()->CreateBuffer(&bd, nullptr, _dynamicCB.put());
|
||||
if (FAILED(hr)) {
|
||||
Logger::Get().ComError("CreateBuffer 失败", hr);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
if (_dynamicCB) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,9 +70,6 @@ struct EffectParameterDesc {
|
|||
|
||||
struct EffectPassFlags {
|
||||
static constexpr uint32_t PSStyle = 1;
|
||||
static constexpr uint32_t UseFP16 = 1 << 1;
|
||||
static constexpr uint32_t UseMulAdd = 1 << 2;
|
||||
static constexpr uint32_t UseDynamic = 1 << 3;
|
||||
};
|
||||
|
||||
struct EffectPassDesc {
|
||||
|
|
@ -86,17 +83,23 @@ struct EffectPassDesc {
|
|||
};
|
||||
|
||||
struct EffectFlags {
|
||||
static constexpr uint32_t InlineParams = 1;
|
||||
// 效果本身的属性
|
||||
static constexpr uint32_t UseDynamic = 1;
|
||||
static constexpr uint32_t UseMulAdd = 1 << 1;
|
||||
static constexpr uint32_t SupportFP16 = 1 << 2;
|
||||
// 编译赋予的属性
|
||||
static constexpr uint32_t InlineParams = 1 << 16;
|
||||
static constexpr uint32_t FP16 = 1 << 17;
|
||||
};
|
||||
|
||||
struct EffectDesc {
|
||||
std::string name;
|
||||
std::string sortName; // 仅供 UI 使用
|
||||
|
||||
const std::pair<std::string, std::string>& GetOutputSizeExpr() const noexcept {
|
||||
return textures[1].sizeExpr;
|
||||
}
|
||||
|
||||
std::string name;
|
||||
std::string sortName; // 仅供 UI 使用
|
||||
|
||||
std::vector<EffectParameterDesc> params;
|
||||
// 0: INPUT
|
||||
// 1: OUTPUT
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue