Merge branch 'render-system' into fix-cursor-on-overlay

This commit is contained in:
Xu 2024-03-14 20:01:17 +08:00
commit dc367a72b2
132 changed files with 52825 additions and 1253 deletions

View file

@ -247,6 +247,24 @@
"contributions": [
"translation"
]
},
{
"login": "hauuau",
"name": "hauuau",
"avatar_url": "https://avatars.githubusercontent.com/u/52239673?v=4",
"profile": "https://github.com/hauuau",
"contributions": [
"code"
]
},
{
"login": "nellydocs",
"name": "nellydocs",
"avatar_url": "https://avatars.githubusercontent.com/u/71311423?v=4",
"profile": "https://github.com/nellydocs",
"contributions": [
"translation"
]
}
],
"contributorsPerLine": 7,

View file

@ -24,7 +24,7 @@ jobs:
run: pip install conan
- name: Load Conan cache
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.conan2/p
key: Conan-${{ hashFiles('src/**/conanfile.txt') }}-${{ matrix.platform }}

View file

@ -42,7 +42,7 @@ jobs:
run: pip install conan
- name: Load Conan cache
uses: actions/cache@v3
uses: actions/cache@v4
with:
path: ~/.conan2/p
key: ${{ runner.os }}-conan-${{ hashFiles('src/**/conanfile.txt') }}

View file

@ -24,7 +24,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
src\Common.Pre.props = src\Common.Pre.props
Directory.Build.props = Directory.Build.props
src\extract_winui_runtime.py = src\extract_winui_runtime.py
src\fix_resfiles.py = src\fix_resfiles.py
src\HybridCRT.props = src\HybridCRT.props
src\WinUI.props = src\WinUI.props
EndProjectSection

View file

@ -114,6 +114,8 @@ Thanks go to these wonderful people:
<td align="center" valign="top" width="14.28%"><a href="https://github.com/IsaiasYang"><img src="https://avatars.githubusercontent.com/u/20205571?v=4?s=100" width="100px;" alt="攸羚"/><br /><sub><b>攸羚</b></sub></a><br /><a href="https://github.com/Blinue/Magpie/commits?author=IsaiasYang" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="http://ohaiibuzzle.dev"><img src="https://avatars.githubusercontent.com/u/23693150?v=4?s=100" width="100px;" alt="OHaiiBuzzle"/><br /><sub><b>OHaiiBuzzle</b></sub></a><br /><a href="#translation-ohaiibuzzle" title="Translation">🌍</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Rastadu23"><img src="https://avatars.githubusercontent.com/u/52637051?v=4?s=100" width="100px;" alt="Rastadu23"/><br /><sub><b>Rastadu23</b></sub></a><br /><a href="#translation-Rastadu23" title="Translation">🌍</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/hauuau"><img src="https://avatars.githubusercontent.com/u/52239673?v=4?s=100" width="100px;" alt="hauuau"/><br /><sub><b>hauuau</b></sub></a><br /><a href="https://github.com/Blinue/Magpie/commits?author=hauuau" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/nellydocs"><img src="https://avatars.githubusercontent.com/u/71311423?v=4?s=100" width="100px;" alt="nellydocs"/><br /><sub><b>nellydocs</b></sub></a><br /><a href="#translation-nellydocs" title="Translation">🌍</a></td>
</tr>
</tbody>
</table>

View file

@ -113,6 +113,8 @@ Magpie 是一个轻量级的窗口缩放工具,内置了多种高效的缩放
<td align="center" valign="top" width="14.28%"><a href="https://github.com/IsaiasYang"><img src="https://avatars.githubusercontent.com/u/20205571?v=4?s=100" width="100px;" alt="攸羚"/><br /><sub><b>攸羚</b></sub></a><br /><a href="https://github.com/Blinue/Magpie/commits?author=IsaiasYang" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="http://ohaiibuzzle.dev"><img src="https://avatars.githubusercontent.com/u/23693150?v=4?s=100" width="100px;" alt="OHaiiBuzzle"/><br /><sub><b>OHaiiBuzzle</b></sub></a><br /><a href="#translation-ohaiibuzzle" title="Translation">🌍</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/Rastadu23"><img src="https://avatars.githubusercontent.com/u/52637051?v=4?s=100" width="100px;" alt="Rastadu23"/><br /><sub><b>Rastadu23</b></sub></a><br /><a href="#translation-Rastadu23" title="Translation">🌍</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/hauuau"><img src="https://avatars.githubusercontent.com/u/52239673?v=4?s=100" width="100px;" alt="hauuau"/><br /><sub><b>hauuau</b></sub></a><br /><a href="https://github.com/Blinue/Magpie/commits?author=hauuau" title="Code">💻</a></td>
<td align="center" valign="top" width="14.28%"><a href="https://github.com/nellydocs"><img src="https://avatars.githubusercontent.com/u/71311423?v=4?s=100" width="100px;" alt="nellydocs"/><br /><sub><b>nellydocs</b></sub></a><br /><a href="#translation-nellydocs" title="Translation">🌍</a></td>
</tr>
</tbody>
</table>

View file

@ -120,10 +120,9 @@ def remove_file(file):
pass
for folder in ["Microsoft.UI.Xaml", "Magpie.App"]:
shutil.rmtree(folder, ignore_errors=True)
shutil.rmtree("Microsoft.UI.Xaml", ignore_errors=True)
for pattern in ["*.pdb", "*.lib", "*.exp", "*.winmd", "*.xml", "*.xbf", "dummy.*"]:
for pattern in ["*.pdb", "*.lib", "*.exp", "*.winmd", "*.xml", "*.xbf"]:
for file in glob.glob(pattern):
remove_file(file)

View file

@ -2,6 +2,7 @@
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<PropertyGroup>
<DefaultLanguage>en-US</DefaultLanguage>
<CppWinRTFastAbi>true</CppWinRTFastAbi>
<CppWinRTOptimized>true</CppWinRTOptimized>
<CppWinRTRootNamespaceAutoMerge>true</CppWinRTRootNamespaceAutoMerge>
<CppWinRTVerbosity>low</CppWinRTVerbosity>
@ -49,10 +50,7 @@
<WholeProgramOptimization>true</WholeProgramOptimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<!-- WINRT_NO_SOURCE_LOCATION: VS 2022 17.6 开始 <source_location> 返回完全限定函数名,使 cppwinrt 中大量的模板函数无法 COMDAT 折叠, -->
<!-- 从而大幅增加了二进制文件的体积。定义 WINRT_NO_SOURCE_LOCATION 以禁止 cppwinrt 使用 <source_location>。这是一个临时解决方案,在未来 -->
<!-- 我们应定义 _USE_DETAILED_FUNCTION_NAME_IN_SOURCE_LOCATION=0 恢复 <source_location> 的旧行为,因为它也让日志文件多了很多废话。 -->
<PreprocessorDefinitions>NDEBUG;WINRT_NO_SOURCE_LOCATION;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,9 @@
// Anime4K_Upscale_GAN_x2_S
// 移植自 https://github.com/bloc97/Anime4K/blob/master/glsl/Upscale/Anime4K_Upscale_GAN_x2_S.glsl
// 移植自 https://github.com/bloc97/Anime4K/blob/8e39551ce96ed172605c89b7dd8be855b5502cc9/glsl/Upscale/Anime4K_Upscale_GAN_x2_S.glsl
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME Anime4K_Upscale_GAN_x2_1
//!TEXTURE
Texture2D INPUT;

File diff suppressed because it is too large Load diff

View file

@ -206,24 +206,137 @@
</CopyFileToFolders>
</ItemGroup>
<ItemGroup>
<CopyFileToFolders Include="RAVU\prescalers.hlsli">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R2.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R2_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R3.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R3_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R4_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_AR_R2.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_AR_R3.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_AR_R4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_R2.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_R3.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_R4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R2.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R2_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R3.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R3_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R4_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_AR_R2.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_AR_R2_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_AR_R3.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_AR_R3_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R2.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R2_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R3.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R3_RGB.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
</ItemGroup>
<ItemGroup>
<CopyFileToFolders Include="RAVU\RAVU_Lite_R3_Weights.dds" />
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R3_Weights.dds" />
<CopyFileToFolders Include="RAVU\ravu_3x_lut2_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_3x_lut3_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_3x_lut4_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_lite_lut2_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_lite_lut3_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_lite_lut4_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_lut2_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_lut3_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_lut4_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_zoom_lut2_ar_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_zoom_lut2_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_zoom_lut3_ar_f16.dds" />
<CopyFileToFolders Include="RAVU\ravu_zoom_lut3_f16.dds" />
</ItemGroup>
<ItemGroup>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns128_win8x4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns128_win8x6.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns16_win8x4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns16_win8x6.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns256_win8x4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns256_win8x6.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns32_win8x4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns32_win8x6.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns64_win8x4.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns64_win8x6.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\prescalers.hlsli">
<FileType>Document</FileType>
</CopyFileToFolders>
</ItemGroup>
<ItemGroup>
<CopyFileToFolders Include="NIS\NIS.hlsl">
@ -291,5 +404,15 @@
<FileType>Document</FileType>
</CopyFileToFolders>
</ItemGroup>
<ItemGroup>
<CopyFileToFolders Include="Anime4K\Anime4K_Upscale_GAN_x3_L.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
</ItemGroup>
<ItemGroup>
<CopyFileToFolders Include="Anime4K\Anime4K_Upscale_GAN_x2_M.hlsl">
<FileType>Document</FileType>
</CopyFileToFolders>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
</Project>

View file

@ -153,24 +153,159 @@
<CopyFileToFolders Include="FXAA\FXAA.hlsli">
<Filter>FXAA</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\prescalers.hlsli">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_3x_lut2_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_3x_lut3_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_3x_lut4_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R2_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R2.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R3_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R3.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R4_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_3x_R4.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_AR_R2.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_AR_R3.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_AR_R4.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_lite_lut2_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_lite_lut3_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_lite_lut4_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_R2.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_R3.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Lite_R3_Weights.dds">
<CopyFileToFolders Include="RAVU\RAVU_Lite_R4.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_lut2_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_lut3_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_lut4_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R2_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R2.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R3_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R3.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R4_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_R4.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_AR_R2_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_AR_R2.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_AR_R3_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_AR_R3.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_zoom_lut2_ar_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_zoom_lut2_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_zoom_lut3_ar_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\ravu_zoom_lut3_f16.dds">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R2_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R2.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R3.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R3_Weights.dds">
<CopyFileToFolders Include="RAVU\RAVU_Zoom_R3_RGB.hlsl">
<Filter>RAVU</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns128_win8x4.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns128_win8x6.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns16_win8x4.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns16_win8x6.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns256_win8x4.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns256_win8x6.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns32_win8x4.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns32_win8x6.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns64_win8x4.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\NNEDI3_nns64_win8x6.hlsl">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NNEDI3\prescalers.hlsli">
<Filter>NNEDI3</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="NIS\NIS.hlsl">
<Filter>NIS</Filter>
</CopyFileToFolders>
@ -219,6 +354,12 @@
<CopyFileToFolders Include="Deband.hlsl" />
<CopyFileToFolders Include="Nearest.hlsl" />
<CopyFileToFolders Include="Bilinear.hlsl" />
<CopyFileToFolders Include="Anime4K\Anime4K_Upscale_GAN_x3_L.hlsl">
<Filter>Anime4K</Filter>
</CopyFileToFolders>
<CopyFileToFolders Include="Anime4K\Anime4K_Upscale_GAN_x2_M.hlsl">
<Filter>Anime4K</Filter>
</CopyFileToFolders>
</ItemGroup>
<ItemGroup>
<Filter Include="Anime4K">

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,206 +1,732 @@
// nnedi3-nns16-win8x4
// 移植自 https://github.com/bjin/mpv-prescalers/blob/cc02ed95c1fe05b72bc21d41257c4c085e6e409b/compute/nnedi3-nns16-win8x4.hook
// 有半像素的偏移
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: nnedi3.py --nns 16 --win 8x4 --use-compute-shader --use-magpie
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 3
//!OUTPUT_WIDTH INPUT_WIDTH * 2
//!OUTPUT_HEIGHT INPUT_HEIGHT * 2
//!VERSION 4
//!SORT_NAME NNEDI3_016_4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam;
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 1 * 2
//!HEIGHT INPUT_HEIGHT * 2 * 1
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam1;
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT * 2
//!FORMAT R16_FLOAT
Texture2D tex1;
//!WIDTH INPUT_WIDTH * 1
//!HEIGHT INPUT_HEIGHT * 2
Texture2D temp;
//!SAMPLER
//!FILTER POINT
SamplerState sam_temp;
//!COMMON
#include "prescalers.hlsli"
#define T(x) asfloat(x)
#define W(i,w0,w1,w2,w3) dot(samples[i],float4(T(w0),T(w1),T(w2),T(w3)))
#define WS(w0,w1) sum1 = exp(sum1 * mstd2 + T(w0)); sum2 = sum2 * mstd2 + T(w1); wsum += sum1; vsum += sum1*(sum2/(1.0+abs(sum2)))
#define LAST_PASS 2
//!PASS 1
//!DESC double_y
//!DESC NNEDI3 (double_y, nns16, win8x4)
//!IN INPUT
//!OUT tex1
//!BLOCK_SIZE 32,16
//!NUM_THREADS 32,8
float nnedi3(float4 samples[8]) {
//!OUT temp
//!BLOCK_SIZE 32, 16
//!NUM_THREADS 32, 8
#pragma optionNV(inline none)
float nnedi3(vec4 samples[8]) {
float sum = 0.0, sumsq = 0.0;
[unroll]
for (int i = 0; i < 8; i++) {
sum += dot(samples[i], 1.0f);
[unroll] for (int i = 0; i < 8; i++) {
sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0));
sumsq += dot(samples[i], samples[i]);
}
float mstd0 = sum / 32.0;
float mstd1 = sumsq / 32.0 - mstd0 * mstd0;
// 不能使用 lerp否则结果可能为 nan
float mstd2 = mstd1 >= 1.192092896e-7 ? rsqrt(mstd1) : 0.0;
float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7);
mstd1 *= mstd2;
float vsum = 0.0, wsum = 0.0, sum1, sum2;
#define T(x) intBitsToFloat(x)
#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3)))
#define WS(w0, w1) \
sum1 = exp(sum1 * mstd2 + T(w0)); \
sum2 = sum2 * mstd2 + T(w1); \
wsum += sum1; \
vsum += sum1 * (sum2 / (1.0 + abs(sum2)));
sum1 = W(0, -1123354974, -1112248839, 1046299686, -1143613552)
+ W(1, -1118620174, 1024662558, 1028038478, -1129268360)
+ W(2, 1016130204, -1087068557, 1063313277, -1103342192)
+ W(3, -1103968288, 1048182784, 1047279381, -1115088511)
+ W(4, -1101453425, 1059583965, -1088182320, 1003350800)
+ W(5, -1117908518, -1119323982, 1034186247, -1134684248)
+ W(6, -1122284590, 1027638054, -1124394588, -1111377363)
+ W(7, -1122818124, -1137723992, 978245507, 1028117438);
sum2 = W(0, -1162931039, -1131063526, 1029801649, -1117642655)
+ W(1, -1136248556, -1131086728, 1031011705, -1128864654)
+ W(2, -1115594515, -1128443230, 1042762789, -1107118398)
+ W(3, -1119907402, 1044675527, 1050674207, -1113986381)
+ W(4, 1022791334, -1107588397, 1009001220, -1186206458)
+ W(5, 1017500018, -1111169922, -1112569685, 1017255694)
+ W(6, -1156766128, -1125594766, -1148613464, 993928432)
+ W(7, 1014782692, -1135599628, -1114139175, 1007622876);
WS(1038828992, 1041685264);
sum1 = W(0, -1114329248, 1049950910, -1097681183, 1028668144) + W(1, 995958527, 1027336960, -1107326552, 1025858258)
+ W(2, -1117673776, 1060640651, -1085831405, 1033402064)
+ W(3, 1034401008, 1045782072, -1105157973, -1122828000)
+ W(4, 1038612842, -1098159517, 1053136924, -1110558370)
+ W(5, 1035088196, -1106507532, 1032016120, -1113173980)
+ W(6, 1008781376, -1124000392, 1023707152, 1012109856)
+ W(7, 1029875310, -1105439902, 1034119968, -1114749520);
sum2 =
W(0, 1031315360, -1099468189, -1112139926, 1036663822) + W(1, -1131767489, -1140834082, 1024287080, -1122285462)
+ W(2, 1023637252, -1100127579, -1117241706, 1038018354)
+ W(3, -1107869385, 1052854494, 1052996200, -1112496415)
+ W(4, -1107666272, 1034036134, 1027811452, -1110479054)
+ W(5, -1117110288, 1024451620, 1027157968, -1112615559) + W(6, -1124350185, 1003450083, -1131082337, 998992195)
+ W(7, -1110538107, 1041131277, 1035032776, -1106762474);
WS(-1086074680, 1053637716);
sum1 = W(0, -1121345387, 1042002951, -1113042450, -1121398619)
+ W(1, -1148805338, -1165378922, -1115297518, 991217235)
+ W(2, -1136570733, 1052460699, -1107443934, -1117268427)
+ W(3, 1049266593, -1094571489, -1098765182, 1036113926)
+ W(4, 1027081787, -1124281856, 1043313411, -1136658365)
+ W(5, -1133439181, 1040734807, 1006695533, -1112513138)
+ W(6, -1158465386, -1121708851, 1016359031, 1021173351)
+ W(7, -1120818857, 1035650578, 1027853163, -1106476275);
sum2 = W(0, 1026517575, -1170492850, -1138816415, -1143472678)
+ W(1, 1017334370, 1003954710, -1132363566, 998846550)
+ W(2, 1051558711, -1096673587, -1136175651, -1124275402)
+ W(3, 1071692777, -1077357700, -1098960792, 1018703670)
+ W(4, 1049822619, -1098179385, -1116986501, 1007812651) + W(5, 1020207734, 996694924, 1003290486, 1007766851)
+ W(6, 1022251878, -1122577241, -1141894102, 1009415395)
+ W(7, 1019995718, 1015494226, -1126828734, -1163222937);
WS(1051521136, 1027207116);
sum1 =
W(0, -1122694020, 1010830545, -1124291704, 1018062184) + W(1, -1121133108, -1124202632, 1037913146, -1116091286)
+ W(2, -1102175837, 1057246783, -1093542759, 1041281977) + W(3, -1116351908, 1026322980, 982577970, -1125394504)
+ W(4, 1045518980, -1089509425, 1055793637, 1008755233) + W(5, 1009393969, 1025178484, -1118947636, -1127575032)
+ W(6, 1008379217, -1117338572, 1001093793, 1015898776) + W(7, 1015772516, 1009646833, 1001810977, -1121163492);
sum2 = W(0, -1137495011, -1135527491, 1027730022, -1118108263)
+ W(1, 1013616911, -1123650952, 1024465134, -1128775579)
+ W(2, -1135578111, 1013443151, 1049128967, -1098008683)
+ W(3, 1029346938, -1114797945, 1068130737, -1080443718)
+ W(4, 1017473747, -1122100892, 1046423571, -1101482344)
+ W(5, 1012413655, -1128721387, -1143058109, -1137148015)
+ W(6, -1133405571, -1166794345, 1020545683, -1128178767)
+ W(7, 1008139351, -1156685818, -1126785325, 991435034);
WS(1057767608, -1132080751);
sum1 = W(0, 1026028453, 1025766741, 1035118319, 1012106581) + W(1, 1026017621, -1135552917, 1040474693, -1138611630)
+ W(2, -1117947285, 1051769667, -1111744027, 1030333189)
+ W(3, 1048679017, -1083959172, -1084413328, 1045191121)
+ W(4, 1025261389, -1120826122, 1049618505, -1122181545)
+ W(5, 1011196341, 1045191525, -1110336171, 1030480605) + W(6, 1015828970, 1028389741, 1028257397, 1027514349)
+ W(7, 1025013027, 1039505775, -1123719333, 1020294666);
sum2 = W(0, 1017587161, -1101123140, 1040188371, 988296658) + W(1, 1028118553, -1103020887, 1022642341, 1010063898)
+ W(2, 1008167722, -1099714612, 1039093756, 1026403646) + W(3, 1005112948, 1049070164, 1046164698, 1033545355)
+ W(4, -1125344655, 1032013714, -1111525569, 1002132020)
+ W(5, 1015776789, 1022049457, -1098832696, 1037334715)
+ W(6, -1148301500, 1009340114, -1115917000, -1139728254)
+ W(7, -1138850406, -1167693540, -1103378287, 1035581889);
WS(-1099372256, -1088618788);
sum1 = W(0, -1112538182, 1048693927, -1112344546, -1109099742)
+ W(1, -1113349022, 1033711782, -1129092599, -1110127398)
+ W(2, -1103996671, 1064716592, -1086749016, 1032699126)
+ W(3, 1024020908, -1143605597, 1044926535, -1121424940)
+ W(4, 1046614908, -1085173359, 1062252083, -1130166943)
+ W(5, -1111225386, 1004694493, 1040479887, -1106709441)
+ W(6, -1110537326, -1108087402, 1034104622, -1120726228)
+ W(7, -1114146165, -1138402062, 1042110371, -1106064827);
sum2 = W(0, 987083788, 1013472954, -1120418118, 979955865)
+ W(1, -1144106823, -1131186779, -1122269098, -1163904780)
+ W(2, -1120467381, -1139561796, 1038342084, -1115615181)
+ W(3, -1121977305, 1044091298, 1042996066, -1127292875)
+ W(4, -1118651341, 1038343490, -1118476220, -1123141745)
+ W(5, -1162389292, -1115306287, -1128689408, 1014320394)
+ W(6, -1152635694, -1155962630, -1132569906, -1135582470)
+ W(7, 964510307, -1117365756, -1141833923, 1008840046);
WS(1041282784, 1044242623);
sum1 = W(0, -1119885764, -1171512555, 1003864029, 1025494836)
+ W(1, -1119816052, -1121861252, 1040963149, -1113504879)
+ W(2, -1100880653, 1057266723, -1094412795, 1043843337)
+ W(3, -1113812594, 1010135439, -1118004569, -1125989575)
+ W(4, 1046531310, -1089952515, 1056310444, -1156936827)
+ W(5, 1015358999, 1031135156, -1114099002, -1122714492)
+ W(6, 1005085853, -1115226950, 1015234855, 1003362397) + W(7, 1021011107, 1003139037, 992693307, -1120612644);
sum2 =
W(0, 1005317381, -1142619324, -1126266146, 1026462555) + W(1, -1143827754, 1012902153, -1128784654, 1020893616)
+ W(2, 1019060164, -1114788024, -1094218173, 1054132458)
+ W(3, 1009279342, -1098688460, -1078812823, 1070492026)
+ W(4, 1014092605, -1120377499, -1099532818, 1048935725) + W(5, -1131000233, 1017453102, 1007638067, 1011358224)
+ W(6, 1012779564, -1139793504, -1130333980, 1015734963)
+ W(7, -1137528453, -1147729078, 1018177647, 987943782);
WS(1046635232, 1024078131);
sum1 = W(0, 1002735212, 1035063871, -1097977761, 1040314319) + W(1, 1025138813, 1034039879, -1105608655, 1035664624)
+ W(2, 1017042555, 1044122447, -1094991056, 1038536855)
+ W(3, -1132524982, -1110416695, 1051547730, -1114843703)
+ W(4, 1031803657, -1092481954, 1050188814, 1003107468) + W(5, 1033606155, -1094320024, 1047410847, 1019470987)
+ W(6, 1021596219, -1107502027, 1031346589, 1021345835)
+ W(7, 1015508823, -1103391009, 1046101811, -1136683190);
sum2 =
W(0, -1096475926, 1044036812, 1052862983, -1106234474) + W(1, -1112281069, -1112231286, 1024115789, -1121785528)
+ W(2, -1116645717, -1111398905, 1051331710, -1130292776)
+ W(3, 1041647377, -1096068583, 1038036111, 1037359643) + W(4, -1113263240, 1026411348, 1042458641, -1111704128)
+ W(5, 1023473494, -1114320784, 1028002558, -1123406807)
+ W(6, -1117017643, -1138574198, 1037890580, -1109714921)
+ W(7, 1039764966, -1104710548, -1106844581, 1041123403);
WS(-1088554040, -1076674880);
sum1 =
W(0, 1026292820, -1132973070, -1144171612, -1130131975) + W(1, 1016736263, 1034501898, -1110973538, 1028857234)
+ W(2, 1042339025, -1089525132, 1052671191, -1108906970)
+ W(3, -1110236986, 1037427962, -1123890785, -1112145786)
+ W(4, -1103961368, 1056478885, -1092344862, 1002874044) + W(5, 1016313655, -1118983748, 1041641985, 1025897228)
+ W(6, -1151588920, 1038469390, 1010979982, -1130905399)
+ W(7, 1014755782, -1123320716, 1017396903, 1033705562);
sum2 = W(0, 1013915195, -1133182691, -1127318198, 1020584890)
+ W(1, 1007730851, 1024414743, -1121307593, 1005058566) + W(2, 981970521, -1111248658, 1035588225, -1124411850)
+ W(3, 1028189234, 1040952978, 1057294107, 1029625115)
+ W(4, -1121038101, -1109339192, -1107404728, 1026110889)
+ W(5, -1142484934, -1094377458, 1024397525, 1023925523)
+ W(6, -1146368902, -1116592821, -1118541421, -1140327971)
+ W(7, 1010322539, -1112421528, 1019759378, -1199698720);
WS(1063581112, 1015292283);
sum1 =
W(0, -1123806598, -1125096044, 1046804719, -1117498166) + W(1, -1124445804, 1037634467, 1028314614, 1006823135)
+ W(2, 1036776315, -1083793455, 1064148787, -1106689849)
+ W(3, -1112186771, -1098422117, 1034155462, 1004978479)
+ W(4, -1102837698, 1058965073, -1089226130, 1033810693)
+ W(5, -1117642958, -1106625757, 1037373467, 1029436414) + W(6, -1137018200, 1036181095, 994321759, -1119765454)
+ W(7, 1010580432, -1127761788, 1021285644, 1034713459);
sum2 = W(0, -1127012521, -1110373665, -1121983257, 1021812843)
+ W(1, -1129458054, -1122115974, -1121551577, 1015201109)
+ W(2, -1134632819, -1118435057, -1107711610, 1039413537)
+ W(3, -1113739078, 1041258512, 1043546644, -1127386873)
+ W(4, -1106078947, 1025961773, 1048226293, -1110385416)
+ W(5, -1115241196, 1041055451, -1131486243, -1135801459)
+ W(6, -1122814807, 1025056413, -1139476701, -1132245806)
+ W(7, -1119046895, 1029845331, 1018415015, -1140149017);
WS(-1109010880, -1087548956);
sum1 = W(0, 1034947768, -1095012676, 1046023882, 1029737824) + W(1, 1034343312, -1102610188, 1039446704, 1025692706)
+ W(2, 1016751552, -1096454908, 1042564604, 1038373096)
+ W(3, 1019661856, -1091443170, -1105694067, 1039271048)
+ W(4, -1126501287, -1131030249, 1044246468, 1012879825)
+ W(5, 1017025648, 1042942296, -1103700296, 1041317114) + W(6, 1030724160, 1019936112, -1141422594, 1029263800)
+ W(7, -1140792121, 1024647464, -1107855416, 1041193844);
sum2 =
W(0, 1034034732, -1107522705, -1105460279, 1021740679) + W(1, -1113997103, -1121503695, 1038975878, -1112744336)
+ W(2, 1028771217, -1114143244, 1032873918, -1121564954) + W(3, 1025456143, -1105773446, 1059420344, 1024971971)
+ W(4, 1035315492, -1109746606, 1040681265, -1122379806)
+ W(5, -1102403849, -1106040358, 1046039582, -1106873869)
+ W(6, 1018212015, -1106459627, 1026290649, -1130313815)
+ W(7, -1099438501, 1039219872, 1046943722, -1105420350);
WS(-1086299832, -1077288694);
sum1 = W(0, 1021716686, -1099039878, -1111509136, 1039618828)
+ W(1, -1132921948, -1108540692, 1021468846, -1131678690)
+ W(2, -1113901292, -1158126306, -1096197083, 1041516082)
+ W(3, -1108835908, 1055092577, 1062013047, -1118733319)
+ W(4, 1023078294, -1089051407, 1050708993, -1122936235) + W(5, 965138311, -1113759276, 1022391342, 1015065790)
+ W(6, 998651320, -1107695832, -1133490396, 997649137) + W(7, -1130194922, -1113503632, 991635057, 1023538631);
sum2 = W(0, -1133976495, 1035891239, -1130801609, -1113698362)
+ W(1, 1027343155, 1030599513, -1108453664, 1016406968)
+ W(2, -1149877867, 1037590422, 1012747883, -1108226898)
+ W(3, -1119506980, 1054189655, -1119322812, -1120928356)
+ W(4, -1126385541, 1041308688, -1107379808, 1016225738)
+ W(5, 1016526837, -1112736561, -1119223720, 988482485) + W(6, 994153115, 1004824957, -1116360142, 1018050885)
+ W(7, -1140785051, -1120347934, -1129452107, -1117792638);
WS(-1113279936, 1066223903);
sum1 = W(0, -1128171420, 1040261344, -1112013315, -1123695998)
+ W(1, -1141738481, -1140107833, -1116929726, -1154978689)
+ W(2, -1138940153, 1050703688, -1108200895, -1123177006)
+ W(3, 1044160156, -1100167260, -1100730273, 1034288823)
+ W(4, 1020686276, -1130335589, 1040782300, -1141423761)
+ W(5, -1129655596, 1035637471, 1024316286, -1114187043) + W(6, 964173357, -1124525100, 1014134393, 1013984857)
+ W(7, -1123239900, 1032644739, 1029624526, -1108229911);
sum2 = W(0, -1115606620, 1021458196, 1009639320, -1131253088)
+ W(1, -1125272644, 1017345212, 1016051020, -1143902384)
+ W(2, -1099614716, 1047257730, -1120838650, 1020803060)
+ W(3, -1080575150, 1068148121, -1113655261, 1032085971)
+ W(4, -1102155153, 1044966894, -1132238288, 1016311348)
+ W(5, -1122847678, 1026244022, -1130782536, -1137376840)
+ W(6, -1123394906, 1017049220, 967940860, -1137115752)
+ W(7, -1129056732, 1010161976, 1004223696, -1136984808);
WS(1060545080, -1126581603);
sum1 =
W(0, 1032630360, -1112268976, 1045186906, -1125010622) + W(1, 1037657648, -1128752350, 1032285712, 1029508223)
+ W(2, 1043836232, -1090205186, 1053340438, -1108078856) + W(3, 1037448680, 1048595306, -1094666759, 1041691860)
+ W(4, 976149203, 1057651571, -1082657749, 1042698525) + W(5, 1031833596, 1035187792, -1092127852, 1040118132)
+ W(6, 1031675647, 1034806588, -1104761760, 1033087420) + W(7, 1025282125, 1043419290, -1096441814, 1034587656);
sum2 =
W(0, -1123698886, 1034075649, 998149095, -1113635181) + W(1, -1126365381, 1026991402, -1118780236, -1168196508)
+ W(2, -1135914762, 1019253181, 1023543366, -1114469118)
+ W(3, -1121651762, 1047572688, 1038479879, -1145545780) + W(4, -1118625490, 1035108181, -1114677625, 992781287)
+ W(5, -1122087574, -1115886918, 1011684618, -1139655050)
+ W(6, -1147908244, 1016718341, -1132109957, -1142844852)
+ W(7, -1134045690, -1117034488, -1137057610, 1007905050);
WS(-1083899832, -1105526146);
sum1 = W(0, 1026357515, -1119744955, -1117075907, -1111407198)
+ W(1, -1139718894, -1125720471, -1106102943, -1152407445)
+ W(2, 1044187583, -1092285679, 1048719011, -1107209883)
+ W(3, -1105573131, 1062437883, 1052836221, -1107292779)
+ W(4, -1104526300, 1058460257, -1089717563, -1122559055)
+ W(5, -1119529939, 1022150135, -1123085499, -1119739267)
+ W(6, -1125768375, 1033366698, -1114009838, -1119196243)
+ W(7, -1132776678, 1009731342, -1112611206, -1129505495);
sum2 = W(0, -1110807022, 1025172792, 1033543849, -1123816828)
+ W(1, -1129400032, -1117035240, 999654946, -1144812946)
+ W(2, -1105612607, 1035443403, 1039345667, -1120747576)
+ W(3, -1123619892, -1135427545, 1053020794, -1113498942)
+ W(4, -1131262448, -1111010692, 1047843748, -1113301822)
+ W(5, 1016529300, -1115955576, -1135856481, -1146605522)
+ W(6, -1129444600, -1117326476, 1022819536, -1119691028)
+ W(7, -1136239801, -1121250556, 998047364, -1135792457);
WS(-1107513792, 1064663354);
sum1 = W(0, 1030862455, -1113532308, 1032378968, -1123071015)
+ W(1, -1161118946, 1021510766, -1127591630, 1009770420)
+ W(2, 1040244826, -1091621085, 1051734861, -1107582956)
+ W(3, -1104300038, 1046262406, 1034822530, -1108820108)
+ W(4, -1102940181, 1054782000, -1095483267, -1125175670)
+ W(5, -1135077628, 1019068110, 1031948820, 1025488559)
+ W(6, -1135539484, 1036941280, -1172984259, -1126076542)
+ W(7, 1011863892, -1128724830, -1120336759, 1036426604);
sum2 =
W(0, -1135206239, -1140752647, 1022777359, 974924014) + W(1, -1139065871, -1123380440, 1021581075, -1133276463)
+ W(2, 1026230428, 988696695, -1122295168, 1029689087) + W(3, 1025917606, -1092786651, -1085937537, -1140169471)
+ W(4, 1027050280, 1049996339, 1032573953, -1135329695) + W(5, 1013849783, 1057784826, -1130048007, -1124883951)
+ W(6, 1016077019, 1033822297, 1032545188, 1011238415) + W(7, -1127829351, 1034470972, -1137094527, 1001568686);
WS(1058918200, -1121082995);
sum1 = W(0, -1123354974, -1112248839, 1046299686, -1143613552) + W(1, -1118620174, 1024662558, 1028038478, -1129268360) + W(2, 1016130204, -1087068557, 1063313277, -1103342192) + W(3, -1103968288, 1048182784, 1047279381, -1115088511) + W(4, -1101453425, 1059583965, -1088182320, 1003350800) + W(5, -1117908518, -1119323982, 1034186247, -1134684248) + W(6, -1122284590, 1027638054, -1124394588, -1111377363) + W(7, -1122818124, -1137723992, 978245507, 1028117438); sum2 = W(0, -1162931039, -1131063526, 1029801649, -1117642655) + W(1, -1136248556, -1131086728, 1031011705, -1128864654) + W(2, -1115594515, -1128443230, 1042762789, -1107118398) + W(3, -1119907402, 1044675527, 1050674207, -1113986381) + W(4, 1022791334, -1107588397, 1009001220, -1186206458) + W(5, 1017500018, -1111169922, -1112569685, 1017255694) + W(6, -1156766128, -1125594766, -1148613464, 993928432) + W(7, 1014782692, -1135599628, -1114139175, 1007622876); WS(1038828992, 1041685264);
sum1 = W(0, -1114329248, 1049950910, -1097681183, 1028668144) + W(1, 995958527, 1027336960, -1107326552, 1025858258) + W(2, -1117673776, 1060640651, -1085831405, 1033402064) + W(3, 1034401008, 1045782072, -1105157973, -1122828000) + W(4, 1038612842, -1098159517, 1053136924, -1110558370) + W(5, 1035088196, -1106507532, 1032016120, -1113173980) + W(6, 1008781376, -1124000392, 1023707152, 1012109856) + W(7, 1029875310, -1105439902, 1034119968, -1114749520); sum2 = W(0, 1031315360, -1099468189, -1112139926, 1036663822) + W(1, -1131767489, -1140834082, 1024287080, -1122285462) + W(2, 1023637252, -1100127579, -1117241706, 1038018354) + W(3, -1107869385, 1052854494, 1052996200, -1112496415) + W(4, -1107666272, 1034036134, 1027811452, -1110479054) + W(5, -1117110288, 1024451620, 1027157968, -1112615559) + W(6, -1124350185, 1003450083, -1131082337, 998992195) + W(7, -1110538107, 1041131277, 1035032776, -1106762474); WS(-1086074680, 1053637716);
sum1 = W(0, -1121345387, 1042002951, -1113042450, -1121398619) + W(1, -1148805338, -1165378922, -1115297518, 991217235) + W(2, -1136570733, 1052460699, -1107443934, -1117268427) + W(3, 1049266593, -1094571489, -1098765182, 1036113926) + W(4, 1027081787, -1124281856, 1043313411, -1136658365) + W(5, -1133439181, 1040734807, 1006695533, -1112513138) + W(6, -1158465386, -1121708851, 1016359031, 1021173351) + W(7, -1120818857, 1035650578, 1027853163, -1106476275); sum2 = W(0, 1026517575, -1170492850, -1138816415, -1143472678) + W(1, 1017334370, 1003954710, -1132363566, 998846550) + W(2, 1051558711, -1096673587, -1136175651, -1124275402) + W(3, 1071692777, -1077357700, -1098960792, 1018703670) + W(4, 1049822619, -1098179385, -1116986501, 1007812651) + W(5, 1020207734, 996694924, 1003290486, 1007766851) + W(6, 1022251878, -1122577241, -1141894102, 1009415395) + W(7, 1019995718, 1015494226, -1126828734, -1163222937); WS(1051521136, 1027207116);
sum1 = W(0, -1122694020, 1010830545, -1124291704, 1018062184) + W(1, -1121133108, -1124202632, 1037913146, -1116091286) + W(2, -1102175837, 1057246783, -1093542759, 1041281977) + W(3, -1116351908, 1026322980, 982577970, -1125394504) + W(4, 1045518980, -1089509425, 1055793637, 1008755233) + W(5, 1009393969, 1025178484, -1118947636, -1127575032) + W(6, 1008379217, -1117338572, 1001093793, 1015898776) + W(7, 1015772516, 1009646833, 1001810977, -1121163492); sum2 = W(0, -1137495011, -1135527491, 1027730022, -1118108263) + W(1, 1013616911, -1123650952, 1024465134, -1128775579) + W(2, -1135578111, 1013443151, 1049128967, -1098008683) + W(3, 1029346938, -1114797945, 1068130737, -1080443718) + W(4, 1017473747, -1122100892, 1046423571, -1101482344) + W(5, 1012413655, -1128721387, -1143058109, -1137148015) + W(6, -1133405571, -1166794345, 1020545683, -1128178767) + W(7, 1008139351, -1156685818, -1126785325, 991435034); WS(1057767608, -1132080751);
sum1 = W(0, 1026028453, 1025766741, 1035118319, 1012106581) + W(1, 1026017621, -1135552917, 1040474693, -1138611630) + W(2, -1117947285, 1051769667, -1111744027, 1030333189) + W(3, 1048679017, -1083959172, -1084413328, 1045191121) + W(4, 1025261389, -1120826122, 1049618505, -1122181545) + W(5, 1011196341, 1045191525, -1110336171, 1030480605) + W(6, 1015828970, 1028389741, 1028257397, 1027514349) + W(7, 1025013027, 1039505775, -1123719333, 1020294666); sum2 = W(0, 1017587161, -1101123140, 1040188371, 988296658) + W(1, 1028118553, -1103020887, 1022642341, 1010063898) + W(2, 1008167722, -1099714612, 1039093756, 1026403646) + W(3, 1005112948, 1049070164, 1046164698, 1033545355) + W(4, -1125344655, 1032013714, -1111525569, 1002132020) + W(5, 1015776789, 1022049457, -1098832696, 1037334715) + W(6, -1148301500, 1009340114, -1115917000, -1139728254) + W(7, -1138850406, -1167693540, -1103378287, 1035581889); WS(-1099372256, -1088618788);
sum1 = W(0, -1112538182, 1048693927, -1112344546, -1109099742) + W(1, -1113349022, 1033711782, -1129092599, -1110127398) + W(2, -1103996671, 1064716592, -1086749016, 1032699126) + W(3, 1024020908, -1143605597, 1044926535, -1121424940) + W(4, 1046614908, -1085173359, 1062252083, -1130166943) + W(5, -1111225386, 1004694493, 1040479887, -1106709441) + W(6, -1110537326, -1108087402, 1034104622, -1120726228) + W(7, -1114146165, -1138402062, 1042110371, -1106064827); sum2 = W(0, 987083788, 1013472954, -1120418118, 979955865) + W(1, -1144106823, -1131186779, -1122269098, -1163904780) + W(2, -1120467381, -1139561796, 1038342084, -1115615181) + W(3, -1121977305, 1044091298, 1042996066, -1127292875) + W(4, -1118651341, 1038343490, -1118476220, -1123141745) + W(5, -1162389292, -1115306287, -1128689408, 1014320394) + W(6, -1152635694, -1155962630, -1132569906, -1135582470) + W(7, 964510307, -1117365756, -1141833923, 1008840046); WS(1041282784, 1044242623);
sum1 = W(0, -1119885764, -1171512555, 1003864029, 1025494836) + W(1, -1119816052, -1121861252, 1040963149, -1113504879) + W(2, -1100880653, 1057266723, -1094412795, 1043843337) + W(3, -1113812594, 1010135439, -1118004569, -1125989575) + W(4, 1046531310, -1089952515, 1056310444, -1156936827) + W(5, 1015358999, 1031135156, -1114099002, -1122714492) + W(6, 1005085853, -1115226950, 1015234855, 1003362397) + W(7, 1021011107, 1003139037, 992693307, -1120612644); sum2 = W(0, 1005317381, -1142619324, -1126266146, 1026462555) + W(1, -1143827754, 1012902153, -1128784654, 1020893616) + W(2, 1019060164, -1114788024, -1094218173, 1054132458) + W(3, 1009279342, -1098688460, -1078812823, 1070492026) + W(4, 1014092605, -1120377499, -1099532818, 1048935725) + W(5, -1131000233, 1017453102, 1007638067, 1011358224) + W(6, 1012779564, -1139793504, -1130333980, 1015734963) + W(7, -1137528453, -1147729078, 1018177647, 987943782); WS(1046635232, 1024078131);
sum1 = W(0, 1002735212, 1035063871, -1097977761, 1040314319) + W(1, 1025138813, 1034039879, -1105608655, 1035664624) + W(2, 1017042555, 1044122447, -1094991056, 1038536855) + W(3, -1132524982, -1110416695, 1051547730, -1114843703) + W(4, 1031803657, -1092481954, 1050188814, 1003107468) + W(5, 1033606155, -1094320024, 1047410847, 1019470987) + W(6, 1021596219, -1107502027, 1031346589, 1021345835) + W(7, 1015508823, -1103391009, 1046101811, -1136683190); sum2 = W(0, -1096475926, 1044036812, 1052862983, -1106234474) + W(1, -1112281069, -1112231286, 1024115789, -1121785528) + W(2, -1116645717, -1111398905, 1051331710, -1130292776) + W(3, 1041647377, -1096068583, 1038036111, 1037359643) + W(4, -1113263240, 1026411348, 1042458641, -1111704128) + W(5, 1023473494, -1114320784, 1028002558, -1123406807) + W(6, -1117017643, -1138574198, 1037890580, -1109714921) + W(7, 1039764966, -1104710548, -1106844581, 1041123403); WS(-1088554040, -1076674880);
sum1 = W(0, 1026292820, -1132973070, -1144171612, -1130131975) + W(1, 1016736263, 1034501898, -1110973538, 1028857234) + W(2, 1042339025, -1089525132, 1052671191, -1108906970) + W(3, -1110236986, 1037427962, -1123890785, -1112145786) + W(4, -1103961368, 1056478885, -1092344862, 1002874044) + W(5, 1016313655, -1118983748, 1041641985, 1025897228) + W(6, -1151588920, 1038469390, 1010979982, -1130905399) + W(7, 1014755782, -1123320716, 1017396903, 1033705562); sum2 = W(0, 1013915195, -1133182691, -1127318198, 1020584890) + W(1, 1007730851, 1024414743, -1121307593, 1005058566) + W(2, 981970521, -1111248658, 1035588225, -1124411850) + W(3, 1028189234, 1040952978, 1057294107, 1029625115) + W(4, -1121038101, -1109339192, -1107404728, 1026110889) + W(5, -1142484934, -1094377458, 1024397525, 1023925523) + W(6, -1146368902, -1116592821, -1118541421, -1140327971) + W(7, 1010322539, -1112421528, 1019759378, -1199698720); WS(1063581112, 1015292283);
sum1 = W(0, -1123806598, -1125096044, 1046804719, -1117498166) + W(1, -1124445804, 1037634467, 1028314614, 1006823135) + W(2, 1036776315, -1083793455, 1064148787, -1106689849) + W(3, -1112186771, -1098422117, 1034155462, 1004978479) + W(4, -1102837698, 1058965073, -1089226130, 1033810693) + W(5, -1117642958, -1106625757, 1037373467, 1029436414) + W(6, -1137018200, 1036181095, 994321759, -1119765454) + W(7, 1010580432, -1127761788, 1021285644, 1034713459); sum2 = W(0, -1127012521, -1110373665, -1121983257, 1021812843) + W(1, -1129458054, -1122115974, -1121551577, 1015201109) + W(2, -1134632819, -1118435057, -1107711610, 1039413537) + W(3, -1113739078, 1041258512, 1043546644, -1127386873) + W(4, -1106078947, 1025961773, 1048226293, -1110385416) + W(5, -1115241196, 1041055451, -1131486243, -1135801459) + W(6, -1122814807, 1025056413, -1139476701, -1132245806) + W(7, -1119046895, 1029845331, 1018415015, -1140149017); WS(-1109010880, -1087548956);
sum1 = W(0, 1034947768, -1095012676, 1046023882, 1029737824) + W(1, 1034343312, -1102610188, 1039446704, 1025692706) + W(2, 1016751552, -1096454908, 1042564604, 1038373096) + W(3, 1019661856, -1091443170, -1105694067, 1039271048) + W(4, -1126501287, -1131030249, 1044246468, 1012879825) + W(5, 1017025648, 1042942296, -1103700296, 1041317114) + W(6, 1030724160, 1019936112, -1141422594, 1029263800) + W(7, -1140792121, 1024647464, -1107855416, 1041193844); sum2 = W(0, 1034034732, -1107522705, -1105460279, 1021740679) + W(1, -1113997103, -1121503695, 1038975878, -1112744336) + W(2, 1028771217, -1114143244, 1032873918, -1121564954) + W(3, 1025456143, -1105773446, 1059420344, 1024971971) + W(4, 1035315492, -1109746606, 1040681265, -1122379806) + W(5, -1102403849, -1106040358, 1046039582, -1106873869) + W(6, 1018212015, -1106459627, 1026290649, -1130313815) + W(7, -1099438501, 1039219872, 1046943722, -1105420350); WS(-1086299832, -1077288694);
sum1 = W(0, 1021716686, -1099039878, -1111509136, 1039618828) + W(1, -1132921948, -1108540692, 1021468846, -1131678690) + W(2, -1113901292, -1158126306, -1096197083, 1041516082) + W(3, -1108835908, 1055092577, 1062013047, -1118733319) + W(4, 1023078294, -1089051407, 1050708993, -1122936235) + W(5, 965138311, -1113759276, 1022391342, 1015065790) + W(6, 998651320, -1107695832, -1133490396, 997649137) + W(7, -1130194922, -1113503632, 991635057, 1023538631); sum2 = W(0, -1133976495, 1035891239, -1130801609, -1113698362) + W(1, 1027343155, 1030599513, -1108453664, 1016406968) + W(2, -1149877867, 1037590422, 1012747883, -1108226898) + W(3, -1119506980, 1054189655, -1119322812, -1120928356) + W(4, -1126385541, 1041308688, -1107379808, 1016225738) + W(5, 1016526837, -1112736561, -1119223720, 988482485) + W(6, 994153115, 1004824957, -1116360142, 1018050885) + W(7, -1140785051, -1120347934, -1129452107, -1117792638); WS(-1113279936, 1066223903);
sum1 = W(0, -1128171420, 1040261344, -1112013315, -1123695998) + W(1, -1141738481, -1140107833, -1116929726, -1154978689) + W(2, -1138940153, 1050703688, -1108200895, -1123177006) + W(3, 1044160156, -1100167260, -1100730273, 1034288823) + W(4, 1020686276, -1130335589, 1040782300, -1141423761) + W(5, -1129655596, 1035637471, 1024316286, -1114187043) + W(6, 964173357, -1124525100, 1014134393, 1013984857) + W(7, -1123239900, 1032644739, 1029624526, -1108229911); sum2 = W(0, -1115606620, 1021458196, 1009639320, -1131253088) + W(1, -1125272644, 1017345212, 1016051020, -1143902384) + W(2, -1099614716, 1047257730, -1120838650, 1020803060) + W(3, -1080575150, 1068148121, -1113655261, 1032085971) + W(4, -1102155153, 1044966894, -1132238288, 1016311348) + W(5, -1122847678, 1026244022, -1130782536, -1137376840) + W(6, -1123394906, 1017049220, 967940860, -1137115752) + W(7, -1129056732, 1010161976, 1004223696, -1136984808); WS(1060545080, -1126581603);
sum1 = W(0, 1032630360, -1112268976, 1045186906, -1125010622) + W(1, 1037657648, -1128752350, 1032285712, 1029508223) + W(2, 1043836232, -1090205186, 1053340438, -1108078856) + W(3, 1037448680, 1048595306, -1094666759, 1041691860) + W(4, 976149203, 1057651571, -1082657749, 1042698525) + W(5, 1031833596, 1035187792, -1092127852, 1040118132) + W(6, 1031675647, 1034806588, -1104761760, 1033087420) + W(7, 1025282125, 1043419290, -1096441814, 1034587656); sum2 = W(0, -1123698886, 1034075649, 998149095, -1113635181) + W(1, -1126365381, 1026991402, -1118780236, -1168196508) + W(2, -1135914762, 1019253181, 1023543366, -1114469118) + W(3, -1121651762, 1047572688, 1038479879, -1145545780) + W(4, -1118625490, 1035108181, -1114677625, 992781287) + W(5, -1122087574, -1115886918, 1011684618, -1139655050) + W(6, -1147908244, 1016718341, -1132109957, -1142844852) + W(7, -1134045690, -1117034488, -1137057610, 1007905050); WS(-1083899832, -1105526146);
sum1 = W(0, 1026357515, -1119744955, -1117075907, -1111407198) + W(1, -1139718894, -1125720471, -1106102943, -1152407445) + W(2, 1044187583, -1092285679, 1048719011, -1107209883) + W(3, -1105573131, 1062437883, 1052836221, -1107292779) + W(4, -1104526300, 1058460257, -1089717563, -1122559055) + W(5, -1119529939, 1022150135, -1123085499, -1119739267) + W(6, -1125768375, 1033366698, -1114009838, -1119196243) + W(7, -1132776678, 1009731342, -1112611206, -1129505495); sum2 = W(0, -1110807022, 1025172792, 1033543849, -1123816828) + W(1, -1129400032, -1117035240, 999654946, -1144812946) + W(2, -1105612607, 1035443403, 1039345667, -1120747576) + W(3, -1123619892, -1135427545, 1053020794, -1113498942) + W(4, -1131262448, -1111010692, 1047843748, -1113301822) + W(5, 1016529300, -1115955576, -1135856481, -1146605522) + W(6, -1129444600, -1117326476, 1022819536, -1119691028) + W(7, -1136239801, -1121250556, 998047364, -1135792457); WS(-1107513792, 1064663354);
sum1 = W(0, 1030862455, -1113532308, 1032378968, -1123071015) + W(1, -1161118946, 1021510766, -1127591630, 1009770420) + W(2, 1040244826, -1091621085, 1051734861, -1107582956) + W(3, -1104300038, 1046262406, 1034822530, -1108820108) + W(4, -1102940181, 1054782000, -1095483267, -1125175670) + W(5, -1135077628, 1019068110, 1031948820, 1025488559) + W(6, -1135539484, 1036941280, -1172984259, -1126076542) + W(7, 1011863892, -1128724830, -1120336759, 1036426604); sum2 = W(0, -1135206239, -1140752647, 1022777359, 974924014) + W(1, -1139065871, -1123380440, 1021581075, -1133276463) + W(2, 1026230428, 988696695, -1122295168, 1029689087) + W(3, 1025917606, -1092786651, -1085937537, -1140169471) + W(4, 1027050280, 1049996339, 1032573953, -1135329695) + W(5, 1013849783, 1057784826, -1130048007, -1124883951) + W(6, 1016077019, 1033822297, 1032545188, 1011238415) + W(7, -1127829351, 1034470972, -1137094527, 1001568686); WS(1058918200, -1121082995);
return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);
}
float GetLuma(float3 color) {
return dot(float3(0.299f, 0.587f, 0.114f), color);
}
shared float inp[429];
groupshared float inp[429];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
const float2 inputPt = GetInputPt();
const uint2 group_base = uint2(blockStart.x, blockStart.y >> 1);
for (int id = threadId.x * MP_NUM_THREADS_Y + threadId.y; id < 429; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 429; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 11, y = (uint)id % 11;
inp[id] = GetLuma(INPUT.SampleLevel(sam, inputPt * float2(group_base.x + x - 3 + 0.5, group_base.y + y - 1 + 0.5), 0).rgb);
inp[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (1)) + 0.5)).x;
}
GroupMemoryBarrierWithGroupSync();
float4 ret = 0.0;
float4 ret0 = 0.0;
float4 samples[8];
const uint local_pos = threadId.x * 11 + threadId.y;
[unroll]
for (int i = 0; i < 8; ++i) {
[unroll]
for (int j = 0; j < 4; ++j) {
samples[i][j] = inp[local_pos + i * 11 + j];
}
barrier();
vec4 ret = vec4(0.0, 0.0, 0.0, 0.0);
vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0);
vec4 samples[8];
samples[0][0] = inp[local_pos + 0];
samples[0][1] = inp[local_pos + 1];
samples[0][2] = inp[local_pos + 2];
samples[0][3] = inp[local_pos + 3];
samples[1][0] = inp[local_pos + 11];
samples[1][1] = inp[local_pos + 12];
samples[1][2] = inp[local_pos + 13];
samples[1][3] = inp[local_pos + 14];
samples[2][0] = inp[local_pos + 22];
samples[2][1] = inp[local_pos + 23];
samples[2][2] = inp[local_pos + 24];
samples[2][3] = inp[local_pos + 25];
samples[3][0] = inp[local_pos + 33];
samples[3][1] = inp[local_pos + 34];
samples[3][2] = inp[local_pos + 35];
samples[3][3] = inp[local_pos + 36];
samples[4][0] = inp[local_pos + 44];
samples[4][1] = inp[local_pos + 45];
samples[4][2] = inp[local_pos + 46];
samples[4][3] = inp[local_pos + 47];
samples[5][0] = inp[local_pos + 55];
samples[5][1] = inp[local_pos + 56];
samples[5][2] = inp[local_pos + 57];
samples[5][3] = inp[local_pos + 58];
samples[6][0] = inp[local_pos + 66];
samples[6][1] = inp[local_pos + 67];
samples[6][2] = inp[local_pos + 68];
samples[6][3] = inp[local_pos + 69];
samples[7][0] = inp[local_pos + 77];
samples[7][1] = inp[local_pos + 78];
samples[7][2] = inp[local_pos + 79];
samples[7][3] = inp[local_pos + 80];
ret[0] = nnedi3(samples);
ret0[0] = inp[local_pos + 34];
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
const uint2 destPos = blockStart + uint2(threadId.x, threadId.y * 2);
tex1[destPos] = samples[3][1];
tex1[destPos + uint2(0, 1)] = nnedi3(samples);
#endif
imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret);
}
//!PASS 2
//!DESC double_x
//!IN tex1, INPUT
//!BLOCK_SIZE 64,8
//!NUM_THREADS 32,8
float nnedi3(float4 samples[8]) {
//!DESC NNEDI3 (double_x, nns16, win8x4)
//!IN INPUT, temp
//!OUT OUTPUT
//!BLOCK_SIZE 64, 8
//!NUM_THREADS 32, 8
#pragma optionNV(inline none)
float nnedi3(vec4 samples[8]) {
float sum = 0.0, sumsq = 0.0;
[unroll]
for (int i = 0; i < 8; i++) {
sum += dot(samples[i], 1.0f);
[unroll] for (int i = 0; i < 8; i++) {
sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0));
sumsq += dot(samples[i], samples[i]);
}
float mstd0 = sum / 32.0;
float mstd1 = sumsq / 32.0 - mstd0 * mstd0;
// 不能使用 lerp否则结果可能为 nan
float mstd2 = mstd1 >= 1.192092896e-7 ? rsqrt(mstd1) : 0.0;
float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7);
mstd1 *= mstd2;
float vsum = 0.0, wsum = 0.0, sum1, sum2;
#define T(x) intBitsToFloat(x)
#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3)))
#define WS(w0, w1) \
sum1 = exp(sum1 * mstd2 + T(w0)); \
sum2 = sum2 * mstd2 + T(w1); \
wsum += sum1; \
vsum += sum1 * (sum2 / (1.0 + abs(sum2)));
sum1 = W(0, -1123354974, -1118620174, 1016130204, -1103968288)
+ W(1, -1101453425, -1117908518, -1122284590, -1122818124)
+ W(2, -1112248839, 1024662558, -1087068557, 1048182784)
+ W(3, 1059583965, -1119323982, 1027638054, -1137723992) + W(4, 1046299686, 1028038478, 1063313277, 1047279381)
+ W(5, -1088182320, 1034186247, -1124394588, 978245507)
+ W(6, -1143613552, -1129268360, -1103342192, -1115088511)
+ W(7, 1003350800, -1134684248, -1111377363, 1028117438);
sum2 =
W(0, -1162931039, -1136248556, -1115594515, -1119907402) + W(1, 1022791334, 1017500018, -1156766128, 1014782692)
+ W(2, -1131063526, -1131086728, -1128443230, 1044675527)
+ W(3, -1107588397, -1111169922, -1125594766, -1135599628)
+ W(4, 1029801649, 1031011705, 1042762789, 1050674207) + W(5, 1009001220, -1112569685, -1148613464, -1114139175)
+ W(6, -1117642655, -1128864654, -1107118398, -1113986381)
+ W(7, -1186206458, 1017255694, 993928432, 1007622876);
WS(1038828992, 1041685264);
sum1 = W(0, -1114329248, 995958527, -1117673776, 1034401008) + W(1, 1038612842, 1035088196, 1008781376, 1029875310)
+ W(2, 1049950910, 1027336960, 1060640651, 1045782072)
+ W(3, -1098159517, -1106507532, -1124000392, -1105439902)
+ W(4, -1097681183, -1107326552, -1085831405, -1105157973)
+ W(5, 1053136924, 1032016120, 1023707152, 1034119968) + W(6, 1028668144, 1025858258, 1033402064, -1122828000)
+ W(7, -1110558370, -1113173980, 1012109856, -1114749520);
sum2 = W(0, 1031315360, -1131767489, 1023637252, -1107869385)
+ W(1, -1107666272, -1117110288, -1124350185, -1110538107)
+ W(2, -1099468189, -1140834082, -1100127579, 1052854494)
+ W(3, 1034036134, 1024451620, 1003450083, 1041131277) + W(4, -1112139926, 1024287080, -1117241706, 1052996200)
+ W(5, 1027811452, 1027157968, -1131082337, 1035032776)
+ W(6, 1036663822, -1122285462, 1038018354, -1112496415)
+ W(7, -1110479054, -1112615559, 998992195, -1106762474);
WS(-1086074680, 1053637716);
sum1 = W(0, -1121345387, -1148805338, -1136570733, 1049266593)
+ W(1, 1027081787, -1133439181, -1158465386, -1120818857)
+ W(2, 1042002951, -1165378922, 1052460699, -1094571489)
+ W(3, -1124281856, 1040734807, -1121708851, 1035650578)
+ W(4, -1113042450, -1115297518, -1107443934, -1098765182)
+ W(5, 1043313411, 1006695533, 1016359031, 1027853163) + W(6, -1121398619, 991217235, -1117268427, 1036113926)
+ W(7, -1136658365, -1112513138, 1021173351, -1106476275);
sum2 = W(0, 1026517575, 1017334370, 1051558711, 1071692777) + W(1, 1049822619, 1020207734, 1022251878, 1019995718)
+ W(2, -1170492850, 1003954710, -1096673587, -1077357700)
+ W(3, -1098179385, 996694924, -1122577241, 1015494226)
+ W(4, -1138816415, -1132363566, -1136175651, -1098960792)
+ W(5, -1116986501, 1003290486, -1141894102, -1126828734)
+ W(6, -1143472678, 998846550, -1124275402, 1018703670)
+ W(7, 1007812651, 1007766851, 1009415395, -1163222937);
WS(1051521136, 1027207116);
sum1 = W(0, -1122694020, -1121133108, -1102175837, -1116351908)
+ W(1, 1045518980, 1009393969, 1008379217, 1015772516) + W(2, 1010830545, -1124202632, 1057246783, 1026322980)
+ W(3, -1089509425, 1025178484, -1117338572, 1009646833)
+ W(4, -1124291704, 1037913146, -1093542759, 982577970) + W(5, 1055793637, -1118947636, 1001093793, 1001810977)
+ W(6, 1018062184, -1116091286, 1041281977, -1125394504)
+ W(7, 1008755233, -1127575032, 1015898776, -1121163492);
sum2 = W(0, -1137495011, 1013616911, -1135578111, 1029346938)
+ W(1, 1017473747, 1012413655, -1133405571, 1008139351)
+ W(2, -1135527491, -1123650952, 1013443151, -1114797945)
+ W(3, -1122100892, -1128721387, -1166794345, -1156685818)
+ W(4, 1027730022, 1024465134, 1049128967, 1068130737) + W(5, 1046423571, -1143058109, 1020545683, -1126785325)
+ W(6, -1118108263, -1128775579, -1098008683, -1080443718)
+ W(7, -1101482344, -1137148015, -1128178767, 991435034);
WS(1057767608, -1132080751);
sum1 =
W(0, 1026028453, 1026017621, -1117947285, 1048679017) + W(1, 1025261389, 1011196341, 1015828970, 1025013027)
+ W(2, 1025766741, -1135552917, 1051769667, -1083959172) + W(3, -1120826122, 1045191525, 1028389741, 1039505775)
+ W(4, 1035118319, 1040474693, -1111744027, -1084413328)
+ W(5, 1049618505, -1110336171, 1028257397, -1123719333) + W(6, 1012106581, -1138611630, 1030333189, 1045191121)
+ W(7, -1122181545, 1030480605, 1027514349, 1020294666);
sum2 = W(0, 1017587161, 1028118553, 1008167722, 1005112948)
+ W(1, -1125344655, 1015776789, -1148301500, -1138850406)
+ W(2, -1101123140, -1103020887, -1099714612, 1049070164)
+ W(3, 1032013714, 1022049457, 1009340114, -1167693540) + W(4, 1040188371, 1022642341, 1039093756, 1046164698)
+ W(5, -1111525569, -1098832696, -1115917000, -1103378287)
+ W(6, 988296658, 1010063898, 1026403646, 1033545355) + W(7, 1002132020, 1037334715, -1139728254, 1035581889);
WS(-1099372256, -1088618788);
sum1 = W(0, -1112538182, -1113349022, -1103996671, 1024020908)
+ W(1, 1046614908, -1111225386, -1110537326, -1114146165)
+ W(2, 1048693927, 1033711782, 1064716592, -1143605597)
+ W(3, -1085173359, 1004694493, -1108087402, -1138402062)
+ W(4, -1112344546, -1129092599, -1086749016, 1044926535)
+ W(5, 1062252083, 1040479887, 1034104622, 1042110371)
+ W(6, -1109099742, -1110127398, 1032699126, -1121424940)
+ W(7, -1130166943, -1106709441, -1120726228, -1106064827);
sum2 = W(0, 987083788, -1144106823, -1120467381, -1121977305)
+ W(1, -1118651341, -1162389292, -1152635694, 964510307)
+ W(2, 1013472954, -1131186779, -1139561796, 1044091298)
+ W(3, 1038343490, -1115306287, -1155962630, -1117365756)
+ W(4, -1120418118, -1122269098, 1038342084, 1042996066)
+ W(5, -1118476220, -1128689408, -1132569906, -1141833923)
+ W(6, 979955865, -1163904780, -1115615181, -1127292875)
+ W(7, -1123141745, 1014320394, -1135582470, 1008840046);
WS(1041282784, 1044242623);
sum1 = W(0, -1119885764, -1119816052, -1100880653, -1113812594)
+ W(1, 1046531310, 1015358999, 1005085853, 1021011107) + W(2, -1171512555, -1121861252, 1057266723, 1010135439)
+ W(3, -1089952515, 1031135156, -1115226950, 1003139037)
+ W(4, 1003864029, 1040963149, -1094412795, -1118004569) + W(5, 1056310444, -1114099002, 1015234855, 992693307)
+ W(6, 1025494836, -1113504879, 1043843337, -1125989575)
+ W(7, -1156936827, -1122714492, 1003362397, -1120612644);
sum2 = W(0, 1005317381, -1143827754, 1019060164, 1009279342)
+ W(1, 1014092605, -1131000233, 1012779564, -1137528453)
+ W(2, -1142619324, 1012902153, -1114788024, -1098688460)
+ W(3, -1120377499, 1017453102, -1139793504, -1147729078)
+ W(4, -1126266146, -1128784654, -1094218173, -1078812823)
+ W(5, -1099532818, 1007638067, -1130333980, 1018177647) + W(6, 1026462555, 1020893616, 1054132458, 1070492026)
+ W(7, 1048935725, 1011358224, 1015734963, 987943782);
WS(1046635232, 1024078131);
sum1 = W(0, 1002735212, 1025138813, 1017042555, -1132524982) + W(1, 1031803657, 1033606155, 1021596219, 1015508823)
+ W(2, 1035063871, 1034039879, 1044122447, -1110416695)
+ W(3, -1092481954, -1094320024, -1107502027, -1103391009)
+ W(4, -1097977761, -1105608655, -1094991056, 1051547730)
+ W(5, 1050188814, 1047410847, 1031346589, 1046101811) + W(6, 1040314319, 1035664624, 1038536855, -1114843703)
+ W(7, 1003107468, 1019470987, 1021345835, -1136683190);
sum2 = W(0, -1096475926, -1112281069, -1116645717, 1041647377)
+ W(1, -1113263240, 1023473494, -1117017643, 1039764966)
+ W(2, 1044036812, -1112231286, -1111398905, -1096068583)
+ W(3, 1026411348, -1114320784, -1138574198, -1104710548)
+ W(4, 1052862983, 1024115789, 1051331710, 1038036111) + W(5, 1042458641, 1028002558, 1037890580, -1106844581)
+ W(6, -1106234474, -1121785528, -1130292776, 1037359643)
+ W(7, -1111704128, -1123406807, -1109714921, 1041123403);
WS(-1088554040, -1076674880);
sum1 = W(0, 1026292820, 1016736263, 1042339025, -1110236986)
+ W(1, -1103961368, 1016313655, -1151588920, 1014755782)
+ W(2, -1132973070, 1034501898, -1089525132, 1037427962)
+ W(3, 1056478885, -1118983748, 1038469390, -1123320716)
+ W(4, -1144171612, -1110973538, 1052671191, -1123890785)
+ W(5, -1092344862, 1041641985, 1010979982, 1017396903)
+ W(6, -1130131975, 1028857234, -1108906970, -1112145786)
+ W(7, 1002874044, 1025897228, -1130905399, 1033705562);
sum2 = W(0, 1013915195, 1007730851, 981970521, 1028189234) + W(1, -1121038101, -1142484934, -1146368902, 1010322539)
+ W(2, -1133182691, 1024414743, -1111248658, 1040952978)
+ W(3, -1109339192, -1094377458, -1116592821, -1112421528)
+ W(4, -1127318198, -1121307593, 1035588225, 1057294107)
+ W(5, -1107404728, 1024397525, -1118541421, 1019759378)
+ W(6, 1020584890, 1005058566, -1124411850, 1029625115)
+ W(7, 1026110889, 1023925523, -1140327971, -1199698720);
WS(1063581112, 1015292283);
sum1 = W(0, -1123806598, -1124445804, 1036776315, -1112186771)
+ W(1, -1102837698, -1117642958, -1137018200, 1010580432)
+ W(2, -1125096044, 1037634467, -1083793455, -1098422117)
+ W(3, 1058965073, -1106625757, 1036181095, -1127761788) + W(4, 1046804719, 1028314614, 1064148787, 1034155462)
+ W(5, -1089226130, 1037373467, 994321759, 1021285644) + W(6, -1117498166, 1006823135, -1106689849, 1004978479)
+ W(7, 1033810693, 1029436414, -1119765454, 1034713459);
sum2 =
W(0, -1127012521, -1129458054, -1134632819, -1113739078)
+ W(1, -1106078947, -1115241196, -1122814807, -1119046895)
+ W(2, -1110373665, -1122115974, -1118435057, 1041258512) + W(3, 1025961773, 1041055451, 1025056413, 1029845331)
+ W(4, -1121983257, -1121551577, -1107711610, 1043546644)
+ W(5, 1048226293, -1131486243, -1139476701, 1018415015) + W(6, 1021812843, 1015201109, 1039413537, -1127386873)
+ W(7, -1110385416, -1135801459, -1132245806, -1140149017);
WS(-1109010880, -1087548956);
sum1 = W(0, 1034947768, 1034343312, 1016751552, 1019661856) + W(1, -1126501287, 1017025648, 1030724160, -1140792121)
+ W(2, -1095012676, -1102610188, -1096454908, -1091443170)
+ W(3, -1131030249, 1042942296, 1019936112, 1024647464) + W(4, 1046023882, 1039446704, 1042564604, -1105694067)
+ W(5, 1044246468, -1103700296, -1141422594, -1107855416)
+ W(6, 1029737824, 1025692706, 1038373096, 1039271048) + W(7, 1012879825, 1041317114, 1029263800, 1041193844);
sum2 = W(0, 1034034732, -1113997103, 1028771217, 1025456143)
+ W(1, 1035315492, -1102403849, 1018212015, -1099438501)
+ W(2, -1107522705, -1121503695, -1114143244, -1105773446)
+ W(3, -1109746606, -1106040358, -1106459627, 1039219872)
+ W(4, -1105460279, 1038975878, 1032873918, 1059420344) + W(5, 1040681265, 1046039582, 1026290649, 1046943722)
+ W(6, 1021740679, -1112744336, -1121564954, 1024971971)
+ W(7, -1122379806, -1106873869, -1130313815, -1105420350);
WS(-1086299832, -1077288694);
sum1 = W(0, 1021716686, -1132921948, -1113901292, -1108835908) + W(1, 1023078294, 965138311, 998651320, -1130194922)
+ W(2, -1099039878, -1108540692, -1158126306, 1055092577)
+ W(3, -1089051407, -1113759276, -1107695832, -1113503632)
+ W(4, -1111509136, 1021468846, -1096197083, 1062013047) + W(5, 1050708993, 1022391342, -1133490396, 991635057)
+ W(6, 1039618828, -1131678690, 1041516082, -1118733319)
+ W(7, -1122936235, 1015065790, 997649137, 1023538631);
sum2 = W(0, -1133976495, 1027343155, -1149877867, -1119506980)
+ W(1, -1126385541, 1016526837, 994153115, -1140785051) + W(2, 1035891239, 1030599513, 1037590422, 1054189655)
+ W(3, 1041308688, -1112736561, 1004824957, -1120347934)
+ W(4, -1130801609, -1108453664, 1012747883, -1119322812)
+ W(5, -1107379808, -1119223720, -1116360142, -1129452107)
+ W(6, -1113698362, 1016406968, -1108226898, -1120928356)
+ W(7, 1016225738, 988482485, 1018050885, -1117792638);
WS(-1113279936, 1066223903);
sum1 =
W(0, -1128171420, -1141738481, -1138940153, 1044160156) + W(1, 1020686276, -1129655596, 964173357, -1123239900)
+ W(2, 1040261344, -1140107833, 1050703688, -1100167260)
+ W(3, -1130335589, 1035637471, -1124525100, 1032644739)
+ W(4, -1112013315, -1116929726, -1108200895, -1100730273)
+ W(5, 1040782300, 1024316286, 1014134393, 1029624526) + W(6, -1123695998, -1154978689, -1123177006, 1034288823)
+ W(7, -1141423761, -1114187043, 1013984857, -1108229911);
sum2 = W(0, -1115606620, -1125272644, -1099614716, -1080575150)
+ W(1, -1102155153, -1122847678, -1123394906, -1129056732)
+ W(2, 1021458196, 1017345212, 1047257730, 1068148121) + W(3, 1044966894, 1026244022, 1017049220, 1010161976)
+ W(4, 1009639320, 1016051020, -1120838650, -1113655261)
+ W(5, -1132238288, -1130782536, 967940860, 1004223696)
+ W(6, -1131253088, -1143902384, 1020803060, 1032085971)
+ W(7, 1016311348, -1137376840, -1137115752, -1136984808);
WS(1060545080, -1126581603);
sum1 = W(0, 1032630360, 1037657648, 1043836232, 1037448680) + W(1, 976149203, 1031833596, 1031675647, 1025282125)
+ W(2, -1112268976, -1128752350, -1090205186, 1048595306)
+ W(3, 1057651571, 1035187792, 1034806588, 1043419290) + W(4, 1045186906, 1032285712, 1053340438, -1094666759)
+ W(5, -1082657749, -1092127852, -1104761760, -1096441814)
+ W(6, -1125010622, 1029508223, -1108078856, 1041691860)
+ W(7, 1042698525, 1040118132, 1033087420, 1034587656);
sum2 = W(0, -1123698886, -1126365381, -1135914762, -1121651762)
+ W(1, -1118625490, -1122087574, -1147908244, -1134045690)
+ W(2, 1034075649, 1026991402, 1019253181, 1047572688) + W(3, 1035108181, -1115886918, 1016718341, -1117034488)
+ W(4, 998149095, -1118780236, 1023543366, 1038479879)
+ W(5, -1114677625, 1011684618, -1132109957, -1137057610)
+ W(6, -1113635181, -1168196508, -1114469118, -1145545780)
+ W(7, 992781287, -1139655050, -1142844852, 1007905050);
WS(-1083899832, -1105526146);
sum1 = W(0, 1026357515, -1139718894, 1044187583, -1105573131)
+ W(1, -1104526300, -1119529939, -1125768375, -1132776678)
+ W(2, -1119744955, -1125720471, -1092285679, 1062437883)
+ W(3, 1058460257, 1022150135, 1033366698, 1009731342) + W(4, -1117075907, -1106102943, 1048719011, 1052836221)
+ W(5, -1089717563, -1123085499, -1114009838, -1112611206)
+ W(6, -1111407198, -1152407445, -1107209883, -1107292779)
+ W(7, -1122559055, -1119739267, -1119196243, -1129505495);
sum2 = W(0, -1110807022, -1129400032, -1105612607, -1123619892)
+ W(1, -1131262448, 1016529300, -1129444600, -1136239801)
+ W(2, 1025172792, -1117035240, 1035443403, -1135427545)
+ W(3, -1111010692, -1115955576, -1117326476, -1121250556)
+ W(4, 1033543849, 999654946, 1039345667, 1053020794) + W(5, 1047843748, -1135856481, 1022819536, 998047364)
+ W(6, -1123816828, -1144812946, -1120747576, -1113498942)
+ W(7, -1113301822, -1146605522, -1119691028, -1135792457);
WS(-1107513792, 1064663354);
sum1 = W(0, 1030862455, -1161118946, 1040244826, -1104300038)
+ W(1, -1102940181, -1135077628, -1135539484, 1011863892)
+ W(2, -1113532308, 1021510766, -1091621085, 1046262406)
+ W(3, 1054782000, 1019068110, 1036941280, -1128724830) + W(4, 1032378968, -1127591630, 1051734861, 1034822530)
+ W(5, -1095483267, 1031948820, -1172984259, -1120336759)
+ W(6, -1123071015, 1009770420, -1107582956, -1108820108)
+ W(7, -1125175670, 1025488559, -1126076542, 1036426604);
sum2 =
W(0, -1135206239, -1139065871, 1026230428, 1025917606) + W(1, 1027050280, 1013849783, 1016077019, -1127829351)
+ W(2, -1140752647, -1123380440, 988696695, -1092786651) + W(3, 1049996339, 1057784826, 1033822297, 1034470972)
+ W(4, 1022777359, 1021581075, -1122295168, -1085937537)
+ W(5, 1032573953, -1130048007, 1032545188, -1137094527) + W(6, 974924014, -1133276463, 1029689087, -1140169471)
+ W(7, -1135329695, -1124883951, 1011238415, 1001568686);
WS(1058918200, -1121082995);
sum1 = W(0, -1123354974, -1118620174, 1016130204, -1103968288) + W(1, -1101453425, -1117908518, -1122284590, -1122818124) + W(2, -1112248839, 1024662558, -1087068557, 1048182784) + W(3, 1059583965, -1119323982, 1027638054, -1137723992) + W(4, 1046299686, 1028038478, 1063313277, 1047279381) + W(5, -1088182320, 1034186247, -1124394588, 978245507) + W(6, -1143613552, -1129268360, -1103342192, -1115088511) + W(7, 1003350800, -1134684248, -1111377363, 1028117438); sum2 = W(0, -1162931039, -1136248556, -1115594515, -1119907402) + W(1, 1022791334, 1017500018, -1156766128, 1014782692) + W(2, -1131063526, -1131086728, -1128443230, 1044675527) + W(3, -1107588397, -1111169922, -1125594766, -1135599628) + W(4, 1029801649, 1031011705, 1042762789, 1050674207) + W(5, 1009001220, -1112569685, -1148613464, -1114139175) + W(6, -1117642655, -1128864654, -1107118398, -1113986381) + W(7, -1186206458, 1017255694, 993928432, 1007622876); WS(1038828992, 1041685264);
sum1 = W(0, -1114329248, 995958527, -1117673776, 1034401008) + W(1, 1038612842, 1035088196, 1008781376, 1029875310) + W(2, 1049950910, 1027336960, 1060640651, 1045782072) + W(3, -1098159517, -1106507532, -1124000392, -1105439902) + W(4, -1097681183, -1107326552, -1085831405, -1105157973) + W(5, 1053136924, 1032016120, 1023707152, 1034119968) + W(6, 1028668144, 1025858258, 1033402064, -1122828000) + W(7, -1110558370, -1113173980, 1012109856, -1114749520); sum2 = W(0, 1031315360, -1131767489, 1023637252, -1107869385) + W(1, -1107666272, -1117110288, -1124350185, -1110538107) + W(2, -1099468189, -1140834082, -1100127579, 1052854494) + W(3, 1034036134, 1024451620, 1003450083, 1041131277) + W(4, -1112139926, 1024287080, -1117241706, 1052996200) + W(5, 1027811452, 1027157968, -1131082337, 1035032776) + W(6, 1036663822, -1122285462, 1038018354, -1112496415) + W(7, -1110479054, -1112615559, 998992195, -1106762474); WS(-1086074680, 1053637716);
sum1 = W(0, -1121345387, -1148805338, -1136570733, 1049266593) + W(1, 1027081787, -1133439181, -1158465386, -1120818857) + W(2, 1042002951, -1165378922, 1052460699, -1094571489) + W(3, -1124281856, 1040734807, -1121708851, 1035650578) + W(4, -1113042450, -1115297518, -1107443934, -1098765182) + W(5, 1043313411, 1006695533, 1016359031, 1027853163) + W(6, -1121398619, 991217235, -1117268427, 1036113926) + W(7, -1136658365, -1112513138, 1021173351, -1106476275); sum2 = W(0, 1026517575, 1017334370, 1051558711, 1071692777) + W(1, 1049822619, 1020207734, 1022251878, 1019995718) + W(2, -1170492850, 1003954710, -1096673587, -1077357700) + W(3, -1098179385, 996694924, -1122577241, 1015494226) + W(4, -1138816415, -1132363566, -1136175651, -1098960792) + W(5, -1116986501, 1003290486, -1141894102, -1126828734) + W(6, -1143472678, 998846550, -1124275402, 1018703670) + W(7, 1007812651, 1007766851, 1009415395, -1163222937); WS(1051521136, 1027207116);
sum1 = W(0, -1122694020, -1121133108, -1102175837, -1116351908) + W(1, 1045518980, 1009393969, 1008379217, 1015772516) + W(2, 1010830545, -1124202632, 1057246783, 1026322980) + W(3, -1089509425, 1025178484, -1117338572, 1009646833) + W(4, -1124291704, 1037913146, -1093542759, 982577970) + W(5, 1055793637, -1118947636, 1001093793, 1001810977) + W(6, 1018062184, -1116091286, 1041281977, -1125394504) + W(7, 1008755233, -1127575032, 1015898776, -1121163492); sum2 = W(0, -1137495011, 1013616911, -1135578111, 1029346938) + W(1, 1017473747, 1012413655, -1133405571, 1008139351) + W(2, -1135527491, -1123650952, 1013443151, -1114797945) + W(3, -1122100892, -1128721387, -1166794345, -1156685818) + W(4, 1027730022, 1024465134, 1049128967, 1068130737) + W(5, 1046423571, -1143058109, 1020545683, -1126785325) + W(6, -1118108263, -1128775579, -1098008683, -1080443718) + W(7, -1101482344, -1137148015, -1128178767, 991435034); WS(1057767608, -1132080751);
sum1 = W(0, 1026028453, 1026017621, -1117947285, 1048679017) + W(1, 1025261389, 1011196341, 1015828970, 1025013027) + W(2, 1025766741, -1135552917, 1051769667, -1083959172) + W(3, -1120826122, 1045191525, 1028389741, 1039505775) + W(4, 1035118319, 1040474693, -1111744027, -1084413328) + W(5, 1049618505, -1110336171, 1028257397, -1123719333) + W(6, 1012106581, -1138611630, 1030333189, 1045191121) + W(7, -1122181545, 1030480605, 1027514349, 1020294666); sum2 = W(0, 1017587161, 1028118553, 1008167722, 1005112948) + W(1, -1125344655, 1015776789, -1148301500, -1138850406) + W(2, -1101123140, -1103020887, -1099714612, 1049070164) + W(3, 1032013714, 1022049457, 1009340114, -1167693540) + W(4, 1040188371, 1022642341, 1039093756, 1046164698) + W(5, -1111525569, -1098832696, -1115917000, -1103378287) + W(6, 988296658, 1010063898, 1026403646, 1033545355) + W(7, 1002132020, 1037334715, -1139728254, 1035581889); WS(-1099372256, -1088618788);
sum1 = W(0, -1112538182, -1113349022, -1103996671, 1024020908) + W(1, 1046614908, -1111225386, -1110537326, -1114146165) + W(2, 1048693927, 1033711782, 1064716592, -1143605597) + W(3, -1085173359, 1004694493, -1108087402, -1138402062) + W(4, -1112344546, -1129092599, -1086749016, 1044926535) + W(5, 1062252083, 1040479887, 1034104622, 1042110371) + W(6, -1109099742, -1110127398, 1032699126, -1121424940) + W(7, -1130166943, -1106709441, -1120726228, -1106064827); sum2 = W(0, 987083788, -1144106823, -1120467381, -1121977305) + W(1, -1118651341, -1162389292, -1152635694, 964510307) + W(2, 1013472954, -1131186779, -1139561796, 1044091298) + W(3, 1038343490, -1115306287, -1155962630, -1117365756) + W(4, -1120418118, -1122269098, 1038342084, 1042996066) + W(5, -1118476220, -1128689408, -1132569906, -1141833923) + W(6, 979955865, -1163904780, -1115615181, -1127292875) + W(7, -1123141745, 1014320394, -1135582470, 1008840046); WS(1041282784, 1044242623);
sum1 = W(0, -1119885764, -1119816052, -1100880653, -1113812594) + W(1, 1046531310, 1015358999, 1005085853, 1021011107) + W(2, -1171512555, -1121861252, 1057266723, 1010135439) + W(3, -1089952515, 1031135156, -1115226950, 1003139037) + W(4, 1003864029, 1040963149, -1094412795, -1118004569) + W(5, 1056310444, -1114099002, 1015234855, 992693307) + W(6, 1025494836, -1113504879, 1043843337, -1125989575) + W(7, -1156936827, -1122714492, 1003362397, -1120612644); sum2 = W(0, 1005317381, -1143827754, 1019060164, 1009279342) + W(1, 1014092605, -1131000233, 1012779564, -1137528453) + W(2, -1142619324, 1012902153, -1114788024, -1098688460) + W(3, -1120377499, 1017453102, -1139793504, -1147729078) + W(4, -1126266146, -1128784654, -1094218173, -1078812823) + W(5, -1099532818, 1007638067, -1130333980, 1018177647) + W(6, 1026462555, 1020893616, 1054132458, 1070492026) + W(7, 1048935725, 1011358224, 1015734963, 987943782); WS(1046635232, 1024078131);
sum1 = W(0, 1002735212, 1025138813, 1017042555, -1132524982) + W(1, 1031803657, 1033606155, 1021596219, 1015508823) + W(2, 1035063871, 1034039879, 1044122447, -1110416695) + W(3, -1092481954, -1094320024, -1107502027, -1103391009) + W(4, -1097977761, -1105608655, -1094991056, 1051547730) + W(5, 1050188814, 1047410847, 1031346589, 1046101811) + W(6, 1040314319, 1035664624, 1038536855, -1114843703) + W(7, 1003107468, 1019470987, 1021345835, -1136683190); sum2 = W(0, -1096475926, -1112281069, -1116645717, 1041647377) + W(1, -1113263240, 1023473494, -1117017643, 1039764966) + W(2, 1044036812, -1112231286, -1111398905, -1096068583) + W(3, 1026411348, -1114320784, -1138574198, -1104710548) + W(4, 1052862983, 1024115789, 1051331710, 1038036111) + W(5, 1042458641, 1028002558, 1037890580, -1106844581) + W(6, -1106234474, -1121785528, -1130292776, 1037359643) + W(7, -1111704128, -1123406807, -1109714921, 1041123403); WS(-1088554040, -1076674880);
sum1 = W(0, 1026292820, 1016736263, 1042339025, -1110236986) + W(1, -1103961368, 1016313655, -1151588920, 1014755782) + W(2, -1132973070, 1034501898, -1089525132, 1037427962) + W(3, 1056478885, -1118983748, 1038469390, -1123320716) + W(4, -1144171612, -1110973538, 1052671191, -1123890785) + W(5, -1092344862, 1041641985, 1010979982, 1017396903) + W(6, -1130131975, 1028857234, -1108906970, -1112145786) + W(7, 1002874044, 1025897228, -1130905399, 1033705562); sum2 = W(0, 1013915195, 1007730851, 981970521, 1028189234) + W(1, -1121038101, -1142484934, -1146368902, 1010322539) + W(2, -1133182691, 1024414743, -1111248658, 1040952978) + W(3, -1109339192, -1094377458, -1116592821, -1112421528) + W(4, -1127318198, -1121307593, 1035588225, 1057294107) + W(5, -1107404728, 1024397525, -1118541421, 1019759378) + W(6, 1020584890, 1005058566, -1124411850, 1029625115) + W(7, 1026110889, 1023925523, -1140327971, -1199698720); WS(1063581112, 1015292283);
sum1 = W(0, -1123806598, -1124445804, 1036776315, -1112186771) + W(1, -1102837698, -1117642958, -1137018200, 1010580432) + W(2, -1125096044, 1037634467, -1083793455, -1098422117) + W(3, 1058965073, -1106625757, 1036181095, -1127761788) + W(4, 1046804719, 1028314614, 1064148787, 1034155462) + W(5, -1089226130, 1037373467, 994321759, 1021285644) + W(6, -1117498166, 1006823135, -1106689849, 1004978479) + W(7, 1033810693, 1029436414, -1119765454, 1034713459); sum2 = W(0, -1127012521, -1129458054, -1134632819, -1113739078) + W(1, -1106078947, -1115241196, -1122814807, -1119046895) + W(2, -1110373665, -1122115974, -1118435057, 1041258512) + W(3, 1025961773, 1041055451, 1025056413, 1029845331) + W(4, -1121983257, -1121551577, -1107711610, 1043546644) + W(5, 1048226293, -1131486243, -1139476701, 1018415015) + W(6, 1021812843, 1015201109, 1039413537, -1127386873) + W(7, -1110385416, -1135801459, -1132245806, -1140149017); WS(-1109010880, -1087548956);
sum1 = W(0, 1034947768, 1034343312, 1016751552, 1019661856) + W(1, -1126501287, 1017025648, 1030724160, -1140792121) + W(2, -1095012676, -1102610188, -1096454908, -1091443170) + W(3, -1131030249, 1042942296, 1019936112, 1024647464) + W(4, 1046023882, 1039446704, 1042564604, -1105694067) + W(5, 1044246468, -1103700296, -1141422594, -1107855416) + W(6, 1029737824, 1025692706, 1038373096, 1039271048) + W(7, 1012879825, 1041317114, 1029263800, 1041193844); sum2 = W(0, 1034034732, -1113997103, 1028771217, 1025456143) + W(1, 1035315492, -1102403849, 1018212015, -1099438501) + W(2, -1107522705, -1121503695, -1114143244, -1105773446) + W(3, -1109746606, -1106040358, -1106459627, 1039219872) + W(4, -1105460279, 1038975878, 1032873918, 1059420344) + W(5, 1040681265, 1046039582, 1026290649, 1046943722) + W(6, 1021740679, -1112744336, -1121564954, 1024971971) + W(7, -1122379806, -1106873869, -1130313815, -1105420350); WS(-1086299832, -1077288694);
sum1 = W(0, 1021716686, -1132921948, -1113901292, -1108835908) + W(1, 1023078294, 965138311, 998651320, -1130194922) + W(2, -1099039878, -1108540692, -1158126306, 1055092577) + W(3, -1089051407, -1113759276, -1107695832, -1113503632) + W(4, -1111509136, 1021468846, -1096197083, 1062013047) + W(5, 1050708993, 1022391342, -1133490396, 991635057) + W(6, 1039618828, -1131678690, 1041516082, -1118733319) + W(7, -1122936235, 1015065790, 997649137, 1023538631); sum2 = W(0, -1133976495, 1027343155, -1149877867, -1119506980) + W(1, -1126385541, 1016526837, 994153115, -1140785051) + W(2, 1035891239, 1030599513, 1037590422, 1054189655) + W(3, 1041308688, -1112736561, 1004824957, -1120347934) + W(4, -1130801609, -1108453664, 1012747883, -1119322812) + W(5, -1107379808, -1119223720, -1116360142, -1129452107) + W(6, -1113698362, 1016406968, -1108226898, -1120928356) + W(7, 1016225738, 988482485, 1018050885, -1117792638); WS(-1113279936, 1066223903);
sum1 = W(0, -1128171420, -1141738481, -1138940153, 1044160156) + W(1, 1020686276, -1129655596, 964173357, -1123239900) + W(2, 1040261344, -1140107833, 1050703688, -1100167260) + W(3, -1130335589, 1035637471, -1124525100, 1032644739) + W(4, -1112013315, -1116929726, -1108200895, -1100730273) + W(5, 1040782300, 1024316286, 1014134393, 1029624526) + W(6, -1123695998, -1154978689, -1123177006, 1034288823) + W(7, -1141423761, -1114187043, 1013984857, -1108229911); sum2 = W(0, -1115606620, -1125272644, -1099614716, -1080575150) + W(1, -1102155153, -1122847678, -1123394906, -1129056732) + W(2, 1021458196, 1017345212, 1047257730, 1068148121) + W(3, 1044966894, 1026244022, 1017049220, 1010161976) + W(4, 1009639320, 1016051020, -1120838650, -1113655261) + W(5, -1132238288, -1130782536, 967940860, 1004223696) + W(6, -1131253088, -1143902384, 1020803060, 1032085971) + W(7, 1016311348, -1137376840, -1137115752, -1136984808); WS(1060545080, -1126581603);
sum1 = W(0, 1032630360, 1037657648, 1043836232, 1037448680) + W(1, 976149203, 1031833596, 1031675647, 1025282125) + W(2, -1112268976, -1128752350, -1090205186, 1048595306) + W(3, 1057651571, 1035187792, 1034806588, 1043419290) + W(4, 1045186906, 1032285712, 1053340438, -1094666759) + W(5, -1082657749, -1092127852, -1104761760, -1096441814) + W(6, -1125010622, 1029508223, -1108078856, 1041691860) + W(7, 1042698525, 1040118132, 1033087420, 1034587656); sum2 = W(0, -1123698886, -1126365381, -1135914762, -1121651762) + W(1, -1118625490, -1122087574, -1147908244, -1134045690) + W(2, 1034075649, 1026991402, 1019253181, 1047572688) + W(3, 1035108181, -1115886918, 1016718341, -1117034488) + W(4, 998149095, -1118780236, 1023543366, 1038479879) + W(5, -1114677625, 1011684618, -1132109957, -1137057610) + W(6, -1113635181, -1168196508, -1114469118, -1145545780) + W(7, 992781287, -1139655050, -1142844852, 1007905050); WS(-1083899832, -1105526146);
sum1 = W(0, 1026357515, -1139718894, 1044187583, -1105573131) + W(1, -1104526300, -1119529939, -1125768375, -1132776678) + W(2, -1119744955, -1125720471, -1092285679, 1062437883) + W(3, 1058460257, 1022150135, 1033366698, 1009731342) + W(4, -1117075907, -1106102943, 1048719011, 1052836221) + W(5, -1089717563, -1123085499, -1114009838, -1112611206) + W(6, -1111407198, -1152407445, -1107209883, -1107292779) + W(7, -1122559055, -1119739267, -1119196243, -1129505495); sum2 = W(0, -1110807022, -1129400032, -1105612607, -1123619892) + W(1, -1131262448, 1016529300, -1129444600, -1136239801) + W(2, 1025172792, -1117035240, 1035443403, -1135427545) + W(3, -1111010692, -1115955576, -1117326476, -1121250556) + W(4, 1033543849, 999654946, 1039345667, 1053020794) + W(5, 1047843748, -1135856481, 1022819536, 998047364) + W(6, -1123816828, -1144812946, -1120747576, -1113498942) + W(7, -1113301822, -1146605522, -1119691028, -1135792457); WS(-1107513792, 1064663354);
sum1 = W(0, 1030862455, -1161118946, 1040244826, -1104300038) + W(1, -1102940181, -1135077628, -1135539484, 1011863892) + W(2, -1113532308, 1021510766, -1091621085, 1046262406) + W(3, 1054782000, 1019068110, 1036941280, -1128724830) + W(4, 1032378968, -1127591630, 1051734861, 1034822530) + W(5, -1095483267, 1031948820, -1172984259, -1120336759) + W(6, -1123071015, 1009770420, -1107582956, -1108820108) + W(7, -1125175670, 1025488559, -1126076542, 1036426604); sum2 = W(0, -1135206239, -1139065871, 1026230428, 1025917606) + W(1, 1027050280, 1013849783, 1016077019, -1127829351) + W(2, -1140752647, -1123380440, 988696695, -1092786651) + W(3, 1049996339, 1057784826, 1033822297, 1034470972) + W(4, 1022777359, 1021581075, -1122295168, -1085937537) + W(5, 1032573953, -1130048007, 1032545188, -1137094527) + W(6, 974924014, -1133276463, 1029689087, -1140169471) + W(7, -1135329695, -1124883951, 1011238415, 1001568686); WS(1058918200, -1121082995);
return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);
}
const static float2x3 rgb2uv = {
-0.169, -0.331, 0.5,
0.5, -0.419, -0.081
};
shared float inp[525];
const static float3x3 yuv2rgb = {
1, -0.00093, 1.401687,
1, -0.3437, -0.71417,
1, 1.77216, 0.00099
};
#define CURRENT_PASS 2
groupshared float inp[525];
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define temp_tex(pos) (float(texture(temp, pos).x))
static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2);
static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y));
#define HOOKED_tex(pos) temp_tex(pos)
#define HOOKED_size temp_size
#define HOOKED_pt temp_pt
void Pass2(uint2 blockStart, uint3 threadId) {
const float2 inputPt = GetInputPt();
const float2 outputPt = GetOutputPt();
const uint2 group_base = uint2(blockStart.x >> 1, blockStart.y);
for (int id = threadId.x * MP_NUM_THREADS_Y + threadId.y; id < 525; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 525; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 15, y = (uint)id % 15;
inp[id] = tex1.SampleLevel(sam, inputPt * float2(group_base.x + x - 1 + 0.5, (group_base.y + y - 3 + 0.5) * 0.5), 0).r;
inp[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (1)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x;
}
GroupMemoryBarrierWithGroupSync();
uint2 destPos = blockStart + uint2(threadId.x * 2, threadId.y);
if (!CheckViewport(destPos)) {
barrier();
vec4 ret = vec4(0.0, 0.0, 0.0, 0.0);
vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0);
vec4 samples[8];
samples[0][0] = inp[local_pos + 0];
samples[0][1] = inp[local_pos + 1];
samples[0][2] = inp[local_pos + 2];
samples[0][3] = inp[local_pos + 3];
samples[1][0] = inp[local_pos + 4];
samples[1][1] = inp[local_pos + 5];
samples[1][2] = inp[local_pos + 6];
samples[1][3] = inp[local_pos + 7];
samples[2][0] = inp[local_pos + 15];
samples[2][1] = inp[local_pos + 16];
samples[2][2] = inp[local_pos + 17];
samples[2][3] = inp[local_pos + 18];
samples[3][0] = inp[local_pos + 19];
samples[3][1] = inp[local_pos + 20];
samples[3][2] = inp[local_pos + 21];
samples[3][3] = inp[local_pos + 22];
samples[4][0] = inp[local_pos + 30];
samples[4][1] = inp[local_pos + 31];
samples[4][2] = inp[local_pos + 32];
samples[4][3] = inp[local_pos + 33];
samples[5][0] = inp[local_pos + 34];
samples[5][1] = inp[local_pos + 35];
samples[5][2] = inp[local_pos + 36];
samples[5][3] = inp[local_pos + 37];
samples[6][0] = inp[local_pos + 45];
samples[6][1] = inp[local_pos + 46];
samples[6][2] = inp[local_pos + 47];
samples[6][3] = inp[local_pos + 48];
samples[7][0] = inp[local_pos + 49];
samples[7][1] = inp[local_pos + 50];
samples[7][2] = inp[local_pos + 51];
samples[7][3] = inp[local_pos + 52];
ret[0] = nnedi3(samples);
ret0[0] = inp[local_pos + 18];
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
float4 ret = 0.0;
float4 ret0 = 0.0;
float4 samples[8];
const uint local_pos = threadId.x * 15 + threadId.y;
[unroll]
for (int i = 0; i < 8; ++i) {
[unroll]
for (int j = 0; j < 4; ++j) {
samples[i][j] = inp[local_pos + (i / 2) * 15 + (i % 2) * 4 + j];
}
}
float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb);
WriteToOutput(destPos, mul(yuv2rgb, float3(samples[2][3], originUV)));
++destPos.x;
if (!CheckViewport(destPos)) {
return;
}
originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb);
WriteToOutput(destPos, mul(yuv2rgb, float3(nnedi3(samples), originUV)));
#endif
imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret);
}

View file

@ -0,0 +1,953 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: nnedi3.py --nns 16 --win 8x6 --use-compute-shader --use-magpie
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!SORT_NAME NNEDI3_016_6
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 1 * 2
//!HEIGHT INPUT_HEIGHT * 2 * 1
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!FORMAT R16_FLOAT
//!WIDTH INPUT_WIDTH * 1
//!HEIGHT INPUT_HEIGHT * 2
Texture2D temp;
//!SAMPLER
//!FILTER POINT
SamplerState sam_temp;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 2
//!PASS 1
//!DESC NNEDI3 (double_y, nns16, win8x6)
//!IN INPUT
//!OUT temp
//!BLOCK_SIZE 32, 16
//!NUM_THREADS 32, 8
#pragma optionNV(inline none)
float nnedi3(vec4 samples[12]) {
float sum = 0.0, sumsq = 0.0;
[unroll] for (int i = 0; i < 12; i++) {
sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0));
sumsq += dot(samples[i], samples[i]);
}
float mstd0 = sum / 48.0;
float mstd1 = sumsq / 48.0 - mstd0 * mstd0;
float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7);
mstd1 *= mstd2;
float vsum = 0.0, wsum = 0.0, sum1, sum2;
#define T(x) intBitsToFloat(x)
#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3)))
#define WS(w0, w1) \
sum1 = exp(sum1 * mstd2 + T(w0)); \
sum2 = sum2 * mstd2 + T(w1); \
wsum += sum1; \
vsum += sum1 * (sum2 / (1.0 + abs(sum2)));
sum1 =
W(0, -1126897990, 1027745880, 1024250604, 1024642508) + W(1, -1121959908, -1149906049, -1130469888, -1121396864)
+ W(2, 1039079928, -1107295041, -1147395201, -1126556538)
+ W(3, -1113607518, 1041026790, 1022159130, 1044630722) + W(4, -1107136294, 1005058137, -1116173177, 1042195560)
+ W(5, -1098313415, 999141354, 1019497054, 1007702352) + W(6, 1015526727, 1018714920, 1042189511, -1106681307)
+ W(7, 1035880216, -1121374916, -1133977224, 1026239260)
+ W(8, -1106606352, 1038936227, -1124106064, 1025050132) + W(9, 990390561, -1131068140, 1013770942, -1122507740)
+ W(10, -1136584888, -1135809122, -1122292152, 1015308851)
+ W(11, -1122039043, 1031978820, -1116330759, 1018900008);
sum2 = W(0, 1017133506, 1011515348, -1139818306, -1123730089) + W(1, 996184056, -1138856554, 1023321012, 1029416248)
+ W(2, -1115999672, 1020129658, 1015618084, 1007066512)
+ W(3, -1119553894, 1057587887, -1090489276, -1109933138)
+ W(4, 1016266760, -1145378916, -1112177411, 1071604647)
+ W(5, -1079392139, -1097028615, 1028448562, 1008681896)
+ W(6, -1165256880, 1051025857, -1098617840, -1105405946)
+ W(7, -1155286464, 1000343320, -1133574805, 1035052104)
+ W(8, -1139515542, -1135392452, -1138601606, 991053648)
+ W(9, 1020043526, 1010374724, -1121583660, -1142174380)
+ W(10, 997185888, -1155288808, -1135761830, 1018728192)
+ W(11, 1024878156, 1002597928, -1131188096, -1132781834);
WS(1018288640, 1027735986);
sum1 =
W(0, 1012158232, -1178449286, 1044498160, -1128542910) + W(1, -1115962871, 1002517720, 1006778572, -1114624234)
+ W(2, 1032943202, 1027108853, 1017365062, 964628492) + W(3, 1025063871, -1104570115, 1059928494, -1088743921)
+ W(4, 1032615126, -1134936888, -1156175041, 1028919475)
+ W(5, -1097612337, -1106124541, 1026836706, -1146238776)
+ W(6, 1010747802, 1034856692, -1085331503, 1059914122)
+ W(7, -1114177498, 1020458158, -1140348884, -1127457566)
+ W(8, 1031833306, 1032056909, -1122073627, 1016604174) + W(9, 1020162890, -1122825993, -1119592595, 1033999672)
+ W(10, 1022377282, 998219705, -1172026051, -1115773453)
+ W(11, 1038136595, 1027508251, -1129465364, 1023799671);
sum2 =
W(0, -1126840972, -1130460798, 1019075916, 1017322604) + W(1, -1131054760, -1131047996, -1145399745, 985194115)
+ W(2, -1120812206, -1129997452, 1006903064, -1143360737)
+ W(3, -1139273136, -1112997847, -1139625904, 1042717692)
+ W(4, -1114175000, -1130986946, 991527106, -1120456092) + W(5, 1043975251, 1051048254, -1113881740, 1007107280)
+ W(6, -1135317632, 1001121889, -1150833602, -1121880440)
+ W(7, 978663174, -1143215153, -1139461992, 1017866680)
+ W(8, -1128878392, -1112673669, 1026044394, -1125685806)
+ W(9, -1129486378, 1006765920, -1133504840, -1126929736)
+ W(10, 1014584312, -1144361281, 995542402, 1000306721)
+ W(11, -1142139489, -1114488494, 1007041936, -1134951296);
WS(1042433344, -1111851638);
sum1 = W(0, -1128612156, -1112658226, -1119638967, 1043958886)
+ W(1, -1120465263, -1128976934, -1139940268, -1123380939)
+ W(2, -1126908022, 1033805831, -1115346894, -1142120768)
+ W(3, -1122042583, -1128727592, -1097703246, 1057665642)
+ W(4, -1104545545, 1005565040, 984858240, -1107767030)
+ W(5, 1052387104, 1046318672, -1108167869, -1148354296)
+ W(6, 999630836, -1114896432, 1054789077, -1095395475)
+ W(7, 1029397739, -1133849404, -1146630760, -1115281716)
+ W(8, 1030603948, -1117224401, -1163176544, -1117808895)
+ W(9, -1126512698, -1129996802, 1028419819, -1123618471)
+ W(10, -1117439993, 1013349902, 996431920, -1123547845)
+ W(11, 1026334318, -1113258842, -1134051464, -1120421311);
sum2 =
W(0, 1022431497, -1109389142, 1004613154, 1028727631) + W(1, 1029503922, -1132574761, -1132240188, -1119299282)
+ W(2, -1139248009, -1129989652, -1140046689, -1114039002)
+ W(3, 1024165374, -1107432916, 1041447926, 1047487962) + W(4, 1017218352, -1135952741, -1114822837, 1044244351)
+ W(5, -1108646182, -1100679909, 1040665470, -1123756570)
+ W(6, -1120729932, 1031006195, 1047688354, -1126089152)
+ W(7, -1120804126, -1148002498, -1124855948, 983982854) + W(8, 1009435309, 1033956847, -1107003694, 1028342876)
+ W(9, -1126342960, -1158996358, -1122846542, -1123334894)
+ W(10, -1140927562, -1117057946, -1128289576, -1121099750)
+ W(11, 1036127241, 1039673953, -1102421772, 1026336008);
WS(1015433728, 1058400049);
sum1 =
W(0, -1139873791, 1031161269, -1113693508, 1033801204) + W(1, -1119172737, -1143910182, -1133909491, 1032977294)
+ W(2, -1112917766, -1131731326, 989007258, 1019358132) + W(3, 1023506921, -1116372870, -1116140698, 1045725159)
+ W(4, -1122523445, 1008313039, -1230944644, 1035249566)
+ W(5, -1103376612, -1102794347, 1044071755, -1115540344)
+ W(6, -1118840528, -1120831281, 1044830734, -1116748777)
+ W(7, 1030473357, -1126204226, 1028378783, -1114963068)
+ W(8, -1141442286, 1032646513, 1018738506, -1118552369)
+ W(9, -1121050287, 1032892305, 1023234585, -1112562780)
+ W(10, 1021910870, 1016154651, 1033465034, -1105610222)
+ W(11, 1034039600, 1030129285, -1122899972, -1124368226);
sum2 = W(0, -1138428449, -1158711528, -1124467432, -1140697417)
+ W(1, 1030243467, 1012442941, 992976916, 1013039401) + W(2, -1130455464, -1123518198, 1033499227, 975746961)
+ W(3, -1142924106, -1128734961, -1113146735, -1099387353)
+ W(4, 1051222006, -1122081826, 976851025, 1036130613)
+ W(5, -1097860430, -1077268149, 1072898808, -1117904739)
+ W(6, 989093448, 1010050489, -1108810723, -1091225653) + W(7, 1056060393, -1131990027, 997652548, -1137359275)
+ W(8, -1122996798, 1032494444, 1025590581, 951236744) + W(9, -1153131756, 990210276, -1140348735, -1115493835)
+ W(10, 1025171621, 1006284898, -1134977059, -1138876101)
+ W(11, -1127238416, 1018469149, 1026307569, -1146863422);
WS(-1143089152, 1030017260);
sum1 =
W(0, 1012276081, -1116644609, 1019444907, -1124688427) + W(1, 1029853709, -1130860131, 1001605962, -1127223379)
+ W(2, -1119160665, 1035777366, -1136557285, -1130309965)
+ W(3, 1024406997, -1109637089, 1048989101, -1098625404)
+ W(4, 1038057505, -1130883561, -1155861797, -1115433381)
+ W(5, 1044433671, 1006101820, -1111190908, 1009046005) + W(6, -1155627981, 1036571679, -1098184025, 1048780603)
+ W(7, -1112291813, 1025361773, -1122534699, 1028189701)
+ W(8, 1039597237, -1104960796, -1130076067, 1018788475)
+ W(9, 1018348791, -1126280255, -1117935161, 1029641477)
+ W(10, 1012573277, -1125993892, -1120990241, 1036379833)
+ W(11, -1136463217, -1111599465, -1154886405, 1020397819);
sum2 = W(0, -1153319600, 1008405084, -1118973116, -1140784820)
+ W(1, 1012585128, 1010769460, -1147284080, 985822624) + W(2, 1010505984, -1129308604, 1021293048, 1001814848)
+ W(3, 1008968960, -1142311064, -1101248908, 1037448945) + W(4, 1024969278, -1160749952, 995456320, 1022276922)
+ W(5, -1089187936, 1057794596, 1033366347, -1123619202)
+ W(6, -1140178660, -1140411728, -1109859050, 1029773785)
+ W(7, 1024400778, -1136545168, -1146954776, 1005012008) + W(8, 1017518401, 1015531414, 1007802556, 1000322872)
+ W(9, -1142030464, 1003782736, 982409184, 974134143) + W(10, 1003482728, -1152799248, -1170856127, 1006946188)
+ W(11, 995727232, 960534268, 1009923956, 985284128);
WS(1064472528, -1121594920);
sum1 =
W(0, -1142654991, 1027230343, -1112807213, 1027061019) + W(1, -1128825126, -1164359388, -1143599223, 1032290711)
+ W(2, -1113392623, 1016010466, 991342574, 1014490160) + W(3, 1014568428, -1136037408, -1115590690, 1034098395)
+ W(4, 1008695068, -1148094031, 1010500896, 1002050167)
+ W(5, -1113734161, -1112872467, 1027642302, -1127829894)
+ W(6, -1124387333, -1122938499, 1038834309, -1130883382)
+ W(7, 1013984188, -1138058188, 1020884834, -1120250507) + W(8, 1029912912, 1015162858, 1015817710, -1124941766)
+ W(9, -1131205634, 1025589157, 1019867389, -1123484555)
+ W(10, 1015459258, 1008886302, 1026841191, -1110863224)
+ W(11, 1031947569, 1019435182, -1129521612, -1130075526);
sum2 =
W(0, 1003807591, -1154115373, 1000124719, 1017182228) + W(1, -1126980607, -1130234859, -1147429191, -1139843175)
+ W(2, 1001833687, 1024488826, -1116401990, 987658746) + W(3, 1002635095, 1018649088, 1008095031, 1040714709)
+ W(4, -1105844805, 1013729967, -1132089351, 1016729308)
+ W(5, -1105992985, 1063780536, -1085442794, 1024604622) + W(6, -1147602519, 1024344696, 1014141127, 1047200342)
+ W(7, -1101306502, 995366957, -1151072125, -1155997437)
+ W(8, -1132427785, 1020609216, -1122913939, -1147894927) + W(9, 964968041, 1001714367, -1141957575, 1023684454)
+ W(10, -1125194898, -1146690231, 1011860423, -1141691791)
+ W(11, -1139390003, 1017456200, -1128761080, -1146063807);
WS(1061878800, -1131153991);
sum1 =
W(0, -1123872727, 1015115512, -1099302516, 1041224340) + W(1, -1144166978, -1171049230, 1018625288, 1031144036)
+ W(2, -1102371221, 1009910425, 1014687697, 1022902338) + W(3, -1127640224, 1036357847, -1085394744, 1052022073)
+ W(4, -1115552350, -1132534141, 1026350045, -1108974562)
+ W(5, 1059569738, 1058525661, -1125187302, 1016189168) + W(6, 1013916191, -1107191102, 1050617832, -1088226291)
+ W(7, 1037730450, -1123531112, 1018183052, 1006433282) + W(8, 1032504563, -1097316565, 1040234099, -1127405808)
+ W(9, -1145362866, 1014427177, 1031877738, -1109508096) + W(10, 1015825508, 1018548825, 1016048056, 1026198990)
+ W(11, 1033421596, -1098228398, 1035235966, -1137247201);
sum2 = W(0, -1131301730, 1031269327, -1127010401, -1109842974)
+ W(1, -1181736700, -1180777340, 973798558, -1131640108)
+ W(2, 1028981651, -1125259759, -1167651134, -1160957999)
+ W(3, -1127780866, 1013454096, -1149526184, -1113692773)
+ W(4, -1123287814, 993986728, 1013478572, -1109509101) + W(5, 1051779317, 1047088883, -1109788940, 1020962386)
+ W(6, -1160424319, -1117315078, 1028380081, -1134194124)
+ W(7, -1115287133, -1136947718, -1135840779, -1131160392)
+ W(8, -1137527992, 1028175261, -1121515979, -1138138790)
+ W(9, -1164912671, -1145619912, 998238336, 1018886164)
+ W(10, -1125209194, -1152989064, -1138738786, -1127332243)
+ W(11, -1148504424, 1027237057, -1142455024, -1123011340);
WS(-1146021888, 1053974589);
sum1 =
W(0, 1029642476, -1119368753, 1042969521, -1095098901) + W(1, 1046685039, 984849429, 1013890275, -1134074211)
+ W(2, 1042359026, -1107285127, 1031018217, -1135393367)
+ W(3, -1176939092, 1007708103, 1045769551, -1096985546) + W(4, 1036262392, -1139413615, 1022266947, 1017736689)
+ W(5, -1101301107, 1034918881, 1003810877, 1024875117) + W(6, -1146466657, 1027345005, -1094644679, 1050538529)
+ W(7, -1120828825, -1172526890, 1004183253, 1032510570) + W(8, -1091538585, 1051699648, 1011534979, 1017671961)
+ W(9, -1160650069, 1019378973, -1107179580, 1036824506)
+ W(10, -1133351451, -1160823333, -1127783457, 1031489314)
+ W(11, -1095508207, 1048776768, 1035618600, 1006585957);
sum2 = W(0, 1031363252, -1091101506, 1048232756, 1057852755) + W(1, -1095952784, 1016290300, 1030774484, 1001500224)
+ W(2, -1110436898, -1132290932, -1131305343, -1126601761)
+ W(3, 1015165558, -1110787951, 1016237906, 1043794074) + W(4, -1113356328, 1003743696, 1007437656, 965388167)
+ W(5, 1014973676, 1047525730, -1152923833, 1022650220) + W(6, 1020087968, 1003188992, -1123006886, 1011818344)
+ W(7, -1111245491, 1021501454, -1158035650, 1041338676)
+ W(8, -1105090874, -1129296549, -1131940021, 1017537464)
+ W(9, -1137051446, -1134903850, -1123217223, 1034851396)
+ W(10, -1117639196, -1133259176, 1018262350, 1033269727)
+ W(11, -1104724635, -1106365430, 1024945328, 1019937714);
WS(-1077057896, -1083600334);
sum1 = W(0, 1017420011, 1011471785, 1029223422, -1116040414) + W(1, 1017123181, 1016511669, 1014201033, 1019976613)
+ W(2, -1126437509, 1015478313, 1024110818, -1167731667)
+ W(3, 1017846781, -1138042285, 1049638570, -1103217262) + W(4, 1023111893, 1009386661, 999765850, 1040273597)
+ W(5, -1090770241, -1087230893, 1030676769, 1023090125)
+ W(6, -1162024122, 1016487629, 1029091694, 1046437488) + W(7, -1112046985, 1020460717, 985808522, 1027730222)
+ W(8, 1037672698, 1024768280, -1120839802, 1025489318) + W(9, 1019153993, 1010855969, 1027546578, 1028909230)
+ W(10, 1023955584, -1134545259, 1011766057, 1025127228)
+ W(11, 1025680213, 1017109109, -1128064723, 1027741830);
sum2 =
W(0, 1023774756, -1107003878, 1020767940, -1118294055) + W(1, -1113997093, 1021408408, -1152708847, 1013240776)
+ W(2, -1108605887, -1128830540, -1139588328, -1119578529)
+ W(3, 1005727232, -1108761818, 1050907301, -1097736561)
+ W(4, 1032528025, -1135972104, -1128030280, 1032847770) + W(5, 1058054639, 1008347200, 1039669350, -1131826954)
+ W(6, 1004577664, 1024878510, -1106188814, 1049418167) + W(7, -1108856812, 999382680, -1116453887, -1129071264)
+ W(8, 1040942692, -1105809360, -1104688291, 1019392776)
+ W(9, 1020705336, -1124253692, -1115446820, 1014050712)
+ W(10, 1018266740, -1117167612, -1127775332, -1114566712)
+ W(11, 1042743894, -1132221182, -1103534695, 1022204104);
WS(1034686080, -1080904524);
sum1 =
W(0, -1139332721, 1025190657, -1143163562, 1041601261) + W(1, 1024768205, -1137907141, -1156631187, 1024127465)
+ W(2, 1040892278, 1028605547, -1129308018, 1012089369) + W(3, 1023562901, 1006799241, -1104914606, 1052908885)
+ W(4, -1117860929, 1019594656, 1011454089, -1145135178)
+ W(5, -1089193318, -1091833281, 1036300940, -1143330794)
+ W(6, 1009225011, -1129417722, 1043909393, -1103073573) + W(7, 1040987970, 992909011, 1012327853, 1017495114)
+ W(8, -1119873834, 1025246703, 1033652713, -1123933213)
+ W(9, 1010687981, 1027561839, -1136185891, -1124345098) + W(10, 1024209623, 1018355139, 1010798725, 1010795083)
+ W(11, -1118482716, 1032670633, 1027144528, -1123266333);
sum2 =
W(0, 998154484, -1124228589, -1132108902, -1115676434) + W(1, -1123985162, 1004957466, -1136847690, 1028193069)
+ W(2, -1123281782, -1123302060, -1132306691, 1011392625)
+ W(3, -1120010648, 1043298286, -1097765474, 1027211577)
+ W(4, -1114822183, -1127542967, -1145824866, -1115567961)
+ W(5, 1059221182, 1034703777, -1131429597, 1022587458) + W(6, 1015307650, -1106126812, 1048600788, -1099334080)
+ W(7, 1029215805, -1127163397, 994166396, -1111174068) + W(8, -1130476352, 1015056080, 1023836215, -1122559367)
+ W(9, 1000606426, -1128437454, 1026255089, -1137618020)
+ W(10, -1127893362, -1171736302, 1010815409, -1110538383)
+ W(11, -1118584150, 1028199647, 1025007180, -1124423270);
WS(-1097173920, -1100403112);
sum1 = W(0, -1133792968, -1126599342, 1026626987, -1109988694)
+ W(1, -1128510918, -1124691470, -1124511038, -1134319356)
+ W(2, -1112479512, -1122054529, -1138055228, -1131431128)
+ W(3, -1133667884, -1113753548, 1051379210, -1097159959)
+ W(4, 1031366423, -1128464692, -1126404688, -1113718896)
+ W(5, 1058852431, 1058630415, -1108453759, -1122909907)
+ W(6, -1129657589, 1034489098, -1097104011, 1049904553)
+ W(7, -1111244112, 1006087192, -1123548289, 1017816566)
+ W(8, 1007326848, -1104990865, -1129654222, -1138955724)
+ W(9, -1134226372, -1122628437, -1112737379, 983139170)
+ W(10, -1143321192, -1123473736, -1120375479, 1029275393)
+ W(11, -1116837058, -1110311540, -1132471000, -1149064600);
sum2 = W(0, -1133003813, -1145103116, -1105221269, 1033080040)
+ W(1, 1016862101, -1129731365, -1170659932, 1024883426)
+ W(2, -1117429423, 1028547885, -1128891234, -1147341896)
+ W(3, 1006656308, -1122208183, -1098340061, 1042272545)
+ W(4, -1121562483, -1121650606, 1031055883, -1101651786)
+ W(5, 1055658740, 1058321046, -1100689547, 1031708925)
+ W(6, -1122785076, -1107240567, 1035604404, -1112738821)
+ W(7, -1115182870, -1123396988, -1138148825, -1137951645)
+ W(8, -1131811521, 1003752088, 1026865631, -1133076983)
+ W(9, -1134424500, -1131665157, -1130287800, 1015669581)
+ W(10, -1129373191, -1131162259, -1131089901, -1116779622)
+ W(11, -1123356625, 1033205575, -1134576021, -1127933595);
WS(1049422752, 1064394145);
sum1 = W(0, 1016583527, -1106085006, 995307718, 1042273115) + W(1, -1113049442, 1025810280, 997641734, -1123841888)
+ W(2, 1031369872, 1021597381, -1122854832, 1006187755)
+ W(3, -1129211865, 1041111742, -1088517333, 1058826428)
+ W(4, -1113933244, 1019889767, -1131677043, 1032245856)
+ W(5, -1098988005, -1105331685, 1032610296, -1131685097)
+ W(6, 1021172552, -1110939130, 1058612208, -1090507155)
+ W(7, 1037338632, -1155049030, 1021691141, -1105269375)
+ W(8, 1030057089, 1043687978, -1122591528, -1134096210)
+ W(9, -1133007562, -1137128282, 1036830720, -1120823228)
+ W(10, -1116248270, 1025994697, 1026669144, -1106745812)
+ W(11, 1034516890, 1038691348, -1117945591, -1126546729);
sum2 = W(0, 1015668141, -1138201662, -1111996311, -1127284815)
+ W(1, -1125087482, 1020174885, -1124041461, -1140877219)
+ W(2, -1116450062, -1123578506, 1024732308, -1139064970)
+ W(3, 1005775275, 1027346708, -1125910350, -1106280325)
+ W(4, 1034158307, -1133423524, 1015274173, 1016303395)
+ W(5, -1108948194, 1052974100, 1032925063, -1161498797)
+ W(6, -1138139200, -1106503093, -1104963655, 1053021197)
+ W(7, -1107449032, -1134898868, 992639399, -1117618841) + W(8, 1031763952, 957951850, 994113735, 1013272790)
+ W(9, -1132053353, -1115775134, 1015724405, 1016609913)
+ W(10, -1132927280, -1132485274, -1129319398, -1122071744)
+ W(11, 1034411590, -1140595900, -1140186580, -1164791981);
WS(-1101497152, -1084603877);
sum1 =
W(0, -1136425045, 1016522037, 967194407, 1019848413) + W(1, -1129523533, -1142614610, -1140218249, -1157845066)
+ W(2, 1029505522, -1119357636, -1140249161, -1135395837)
+ W(3, -1121565262, 1035402982, 1022903246, 1027088345)
+ W(4, -1121932442, -1148904362, -1122160667, 1027884002)
+ W(5, -1107598171, 1024422013, -1127296803, 1002411186) + W(6, 1006883159, 1025282390, 1025270942, -1117602990)
+ W(7, 1030372258, -1130529549, -1132497425, 1022271101)
+ W(8, -1120772739, 1030415880, -1129818261, 1018540973)
+ W(9, 1004502690, -1138792353, -1154700189, -1171556244)
+ W(10, -1138666305, -1138856043, -1128604789, 995143101)
+ W(11, -1128284203, 1025955498, -1121511513, 1011955033);
sum2 = W(0, -1126668299, -1131366283, 1024971228, 1000957181)
+ W(1, -1151515419, 1005199725, -1137964827, -1117612139)
+ W(2, 1034620123, -1119890411, -1145021381, -1136862175)
+ W(3, 1015963121, -1097765254, 1049249869, 1026062254) + W(4, 1001872029, 1007955643, 1030757650, -1083955387)
+ W(5, 1064229708, -1107214224, 1026637176, -1125717658)
+ W(6, -1137547503, -1103492737, 1047078464, -1122275403)
+ W(7, 1027173860, -1169614250, 997720155, -1118797430) + W(8, 1017921725, 1016072153, -1135832789, 923654805)
+ W(9, -1132279825, -1131387718, 1024786888, -1133941049)
+ W(10, -1148432117, 1002011725, -1152589275, -1140632131)
+ W(11, -1144191965, 996433547, -1140699475, 1005736109);
WS(1059552336, -1136539026);
sum1 =
W(0, 990367896, 1041343484, -1096612504, 1033353841) + W(1, -1125599349, 1028944863, 1010957914, 1036710283)
+ W(2, -1107358947, 1029016441, -1132821402, 1024290996)
+ W(3, -1154541352, 1045269292, -1087221074, 1042554433)
+ W(4, -1154580200, 1023892422, 1017372383, -1112141659) + W(5, 1058232297, 1029783110, -1114120867, 1023410731)
+ W(6, 1026284586, -1116984235, 1051438086, -1087458720) + W(7, 1033522371, -1144215764, 1015461809, 1018013925)
+ W(8, 1047713030, -1095293300, 1032365167, -1144750420) + W(9, 1014364322, 1006339428, 1032067931, -1114380761)
+ W(10, 1004597796, 1001346936, 1021777309, 1032228520)
+ W(11, 1045851190, -1099415088, 1030006574, -1130073781);
sum2 = W(0, -1153914788, -1101809160, 1052877341, 1046574229)
+ W(1, -1095334336, 1023520281, -1126180245, -1115520194)
+ W(2, 1022007580, 1000424166, -1113807813, 1021218858) + W(3, 995844276, -1114410922, 1055965696, 1034680258)
+ W(4, -1109583292, 1008634443, -1141303142, 1033573989)
+ W(5, -1098900400, -1098051352, 1033797491, -1115608949) + W(6, 1026951758, 998799030, 1023481081, 1045079279)
+ W(7, 1032986287, 1032307290, 990856044, -1110191966) + W(8, 1023185808, -1106708743, 1025876178, -1128938562)
+ W(9, 1004850742, -1129252703, 1031073312, 984863273) + W(10, -1137844345, 1017335440, 1015235936, 1016759632)
+ W(11, -1104219784, -1103050031, 1038371038, 1020607644);
WS(-1080660584, -1085825159);
sum1 =
W(0, 1013708199, -1123370319, -1145658646, -1118786339) + W(1, 1028171867, -1144908790, 998525366, -1131079022)
+ W(2, -1111041043, 1035331132, 1017605134, -1131113128)
+ W(3, 1026247587, -1110742584, 1047524760, -1095527502)
+ W(4, 1042485668, -1130744068, 1009982783, -1113918027) + W(5, 1038280501, 1041941518, -1110999603, 992723116)
+ W(6, -1136883881, 1032009669, -1096311074, 1051037928)
+ W(7, -1106204846, 1025830203, -1128223794, 1025751155)
+ W(8, 1042402294, -1106649743, -1132447358, 1017749654) + W(9, 999596614, -1126831290, -1118872454, 1032615945)
+ W(10, 1002160934, -1127230527, -1126850910, 1033490448)
+ W(11, 1023947050, -1111971999, 971034337, 1018668086);
sum2 = W(0, 988660617, 1017543700, 1015794522, -1133704409) + W(1, 1003471274, -1140119133, -1145776834, 1002138986)
+ W(2, 1001599498, 1024621822, -1135257421, -1136500105)
+ W(3, -1133422913, 1031822055, 1041494739, -1102581932)
+ W(4, 970658596, -1163479081, -1126488793, 1032911160) + W(5, 1056510750, -1089051586, 1026713544, 1009057465)
+ W(6, 999416722, 1018658069, 1023998101, -1111744235) + W(7, 945757471, 1000517690, 999055930, 1007351961)
+ W(8, -1138508317, 1009295285, 998080468, -1137960905) + W(9, 987033481, -1162261577, 991201876, -1140892226)
+ W(10, -1156050276, -1186683976, -1179419172, 999395634)
+ W(11, -1141702058, -1147317506, 1007988669, -1146609818);
WS(1064784784, -1120346387);
sum1 = W(0, -1150678408, 1015721531, 1049255678, -1099108228)
+ W(1, -1149551256, -1136953142, 1000581420, -1110077251)
+ W(2, 1043607805, -1107416484, 1017163947, -1140022794)
+ W(3, 1006062348, -1107299655, 1059242626, -1089544734)
+ W(4, 1023526494, -1139533474, 1015088861, -1132691862)
+ W(5, -1123916922, -1130977491, 1022505321, 1012221798)
+ W(6, -1136518116, -1148196556, -1096371932, 1057929313)
+ W(7, -1104456865, 1014035238, -1126533711, 1013224070)
+ W(8, -1100407642, 1048500643, -1111675367, 1026165050)
+ W(9, 1012432222, -1124886999, -1132580564, 1035479729)
+ W(10, -1127245287, -1136458552, -1122704190, 1014270588)
+ W(11, -1102354822, 1044504531, 1007459698, 1017479699);
sum2 = W(0, -1140771860, 1031694512, -1104948969, -1115570202)
+ W(1, 1040745971, -1127298441, -1125513054, -1122230843) + W(2, 993388690, 1042093481, -1111499166, 995262946)
+ W(3, -1131667695, 979286214, 1026183534, 1042830623) + W(4, -1119680402, 1002124441, -1131288705, 1025077104)
+ W(5, -1111209187, -1112764939, 982469091, -1123012516) + W(6, 978159878, -1108853537, 1041617383, 1043422569)
+ W(7, -1120447085, -1129740789, 1012596136, -1102087836)
+ W(8, 1045410736, 1034771561, -1109907689, -1125016939)
+ W(9, 1011933560, -1117751010, 1030126174, 1014235016)
+ W(10, -1127258987, 1004566649, -1121534607, -1113389694)
+ W(11, 1044425994, 1025820984, -1115100280, -1119639931);
WS(-1088649680, 1067112300);
return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);
}
shared float inp[507];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) { temp[pos] = (value); }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 507; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 13, y = (uint)id % 13;
inp[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (3)) + 0.5, float(group_base.y + y - (2)) + 0.5)).x;
}
barrier();
vec4 ret = vec4(0.0, 0.0, 0.0, 0.0);
vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0);
vec4 samples[12];
samples[0][0] = inp[local_pos + 0];
samples[0][1] = inp[local_pos + 1];
samples[0][2] = inp[local_pos + 2];
samples[0][3] = inp[local_pos + 3];
samples[1][0] = inp[local_pos + 4];
samples[1][1] = inp[local_pos + 5];
samples[1][2] = inp[local_pos + 13];
samples[1][3] = inp[local_pos + 14];
samples[2][0] = inp[local_pos + 15];
samples[2][1] = inp[local_pos + 16];
samples[2][2] = inp[local_pos + 17];
samples[2][3] = inp[local_pos + 18];
samples[3][0] = inp[local_pos + 26];
samples[3][1] = inp[local_pos + 27];
samples[3][2] = inp[local_pos + 28];
samples[3][3] = inp[local_pos + 29];
samples[4][0] = inp[local_pos + 30];
samples[4][1] = inp[local_pos + 31];
samples[4][2] = inp[local_pos + 39];
samples[4][3] = inp[local_pos + 40];
samples[5][0] = inp[local_pos + 41];
samples[5][1] = inp[local_pos + 42];
samples[5][2] = inp[local_pos + 43];
samples[5][3] = inp[local_pos + 44];
samples[6][0] = inp[local_pos + 52];
samples[6][1] = inp[local_pos + 53];
samples[6][2] = inp[local_pos + 54];
samples[6][3] = inp[local_pos + 55];
samples[7][0] = inp[local_pos + 56];
samples[7][1] = inp[local_pos + 57];
samples[7][2] = inp[local_pos + 65];
samples[7][3] = inp[local_pos + 66];
samples[8][0] = inp[local_pos + 67];
samples[8][1] = inp[local_pos + 68];
samples[8][2] = inp[local_pos + 69];
samples[8][3] = inp[local_pos + 70];
samples[9][0] = inp[local_pos + 78];
samples[9][1] = inp[local_pos + 79];
samples[9][2] = inp[local_pos + 80];
samples[9][3] = inp[local_pos + 81];
samples[10][0] = inp[local_pos + 82];
samples[10][1] = inp[local_pos + 83];
samples[10][2] = inp[local_pos + 91];
samples[10][3] = inp[local_pos + 92];
samples[11][0] = inp[local_pos + 93];
samples[11][1] = inp[local_pos + 94];
samples[11][2] = inp[local_pos + 95];
samples[11][3] = inp[local_pos + 96];
ret[0] = nnedi3(samples);
ret0[0] = inp[local_pos + 41];
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2), ret0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(1, 2) + ivec2(0, 1), ret);
}
//!PASS 2
//!DESC NNEDI3 (double_x, nns16, win8x6)
//!IN INPUT, temp
//!OUT OUTPUT
//!BLOCK_SIZE 64, 8
//!NUM_THREADS 32, 8
#pragma optionNV(inline none)
float nnedi3(vec4 samples[12]) {
float sum = 0.0, sumsq = 0.0;
[unroll] for (int i = 0; i < 12; i++) {
sum += dot(samples[i], vec4(1.0, 1.0, 1.0, 1.0));
sumsq += dot(samples[i], samples[i]);
}
float mstd0 = sum / 48.0;
float mstd1 = sumsq / 48.0 - mstd0 * mstd0;
float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= 1.192092896e-7);
mstd1 *= mstd2;
float vsum = 0.0, wsum = 0.0, sum1, sum2;
#define T(x) intBitsToFloat(x)
#define W(i, w0, w1, w2, w3) dot(samples[i], vec4(T(w0), T(w1), T(w2), T(w3)))
#define WS(w0, w1) \
sum1 = exp(sum1 * mstd2 + T(w0)); \
sum2 = sum2 * mstd2 + T(w1); \
wsum += sum1; \
vsum += sum1 * (sum2 / (1.0 + abs(sum2)));
sum1 = W(0, -1126897990, -1130469888, -1113607518, -1116173177)
+ W(1, 1015526727, -1133977224, 990390561, -1122292152) + W(2, 1027745880, -1121396864, 1041026790, 1042195560)
+ W(3, 1018714920, 1026239260, -1131068140, 1015308851) + W(4, 1024250604, 1039079928, 1022159130, -1098313415)
+ W(5, 1042189511, -1106606352, 1013770942, -1122039043) + W(6, 1024642508, -1107295041, 1044630722, 999141354)
+ W(7, -1106681307, 1038936227, -1122507740, 1031978820)
+ W(8, -1121959908, -1147395201, -1107136294, 1019497054)
+ W(9, 1035880216, -1124106064, -1136584888, -1116330759)
+ W(10, -1149906049, -1126556538, 1005058137, 1007702352)
+ W(11, -1121374916, 1025050132, -1135809122, 1018900008);
sum2 = W(0, 1017133506, 1023321012, -1119553894, -1112177411)
+ W(1, -1165256880, -1133574805, 1020043526, -1135761830)
+ W(2, 1011515348, 1029416248, 1057587887, 1071604647) + W(3, 1051025857, 1035052104, 1010374724, 1018728192)
+ W(4, -1139818306, -1115999672, -1090489276, -1079392139)
+ W(5, -1098617840, -1139515542, -1121583660, 1024878156)
+ W(6, -1123730089, 1020129658, -1109933138, -1097028615)
+ W(7, -1105405946, -1135392452, -1142174380, 1002597928) + W(8, 996184056, 1015618084, 1016266760, 1028448562)
+ W(9, -1155286464, -1138601606, 997185888, -1131188096)
+ W(10, -1138856554, 1007066512, -1145378916, 1008681896)
+ W(11, 1000343320, 991053648, -1155288808, -1132781834);
WS(1018288640, 1027735986);
sum1 =
W(0, 1012158232, 1006778572, 1025063871, -1156175041) + W(1, 1010747802, -1140348884, 1020162890, -1172026051)
+ W(2, -1178449286, -1114624234, -1104570115, 1028919475)
+ W(3, 1034856692, -1127457566, -1122825993, -1115773453)
+ W(4, 1044498160, 1032943202, 1059928494, -1097612337) + W(5, -1085331503, 1031833306, -1119592595, 1038136595)
+ W(6, -1128542910, 1027108853, -1088743921, -1106124541) + W(7, 1059914122, 1032056909, 1033999672, 1027508251)
+ W(8, -1115962871, 1017365062, 1032615126, 1026836706)
+ W(9, -1114177498, -1122073627, 1022377282, -1129465364)
+ W(10, 1002517720, 964628492, -1134936888, -1146238776) + W(11, 1020458158, 1016604174, 998219705, 1023799671);
sum2 = W(0, -1126840972, -1145399745, -1139273136, 991527106)
+ W(1, -1135317632, -1139461992, -1129486378, 995542402)
+ W(2, -1130460798, 985194115, -1112997847, -1120456092) + W(3, 1001121889, 1017866680, 1006765920, 1000306721)
+ W(4, 1019075916, -1120812206, -1139625904, 1043975251)
+ W(5, -1150833602, -1128878392, -1133504840, -1142139489)
+ W(6, 1017322604, -1129997452, 1042717692, 1051048254)
+ W(7, -1121880440, -1112673669, -1126929736, -1114488494)
+ W(8, -1131054760, 1006903064, -1114175000, -1113881740) + W(9, 978663174, 1026044394, 1014584312, 1007041936)
+ W(10, -1131047996, -1143360737, -1130986946, 1007107280)
+ W(11, -1143215153, -1125685806, -1144361281, -1134951296);
WS(1042433344, -1111851638);
sum1 = W(0, -1128612156, -1139940268, -1122042583, 984858240) + W(1, 999630836, -1146630760, -1126512698, 996431920)
+ W(2, -1112658226, -1123380939, -1128727592, -1107767030)
+ W(3, -1114896432, -1115281716, -1129996802, -1123547845)
+ W(4, -1119638967, -1126908022, -1097703246, 1052387104)
+ W(5, 1054789077, 1030603948, 1028419819, 1026334318) + W(6, 1043958886, 1033805831, 1057665642, 1046318672)
+ W(7, -1095395475, -1117224401, -1123618471, -1113258842)
+ W(8, -1120465263, -1115346894, -1104545545, -1108167869)
+ W(9, 1029397739, -1163176544, -1117439993, -1134051464)
+ W(10, -1128976934, -1142120768, 1005565040, -1148354296)
+ W(11, -1133849404, -1117808895, 1013349902, -1120421311);
sum2 =
W(0, 1022431497, -1132240188, 1024165374, -1114822837)
+ W(1, -1120729932, -1124855948, -1126342960, -1128289576)
+ W(2, -1109389142, -1119299282, -1107432916, 1044244351)
+ W(3, 1031006195, 983982854, -1158996358, -1121099750) + W(4, 1004613154, -1139248009, 1041447926, -1108646182)
+ W(5, 1047688354, 1009435309, -1122846542, 1036127241) + W(6, 1028727631, -1129989652, 1047487962, -1100679909)
+ W(7, -1126089152, 1033956847, -1123334894, 1039673953) + W(8, 1029503922, -1140046689, 1017218352, 1040665470)
+ W(9, -1120804126, -1107003694, -1140927562, -1102421772)
+ W(10, -1132574761, -1114039002, -1135952741, -1123756570)
+ W(11, -1148002498, 1028342876, -1117057946, 1026336008);
WS(1015433728, 1058400049);
sum1 =
W(0, -1139873791, -1133909491, 1023506921, -1230944644) + W(1, -1118840528, 1028378783, -1121050287, 1033465034)
+ W(2, 1031161269, 1032977294, -1116372870, 1035249566)
+ W(3, -1120831281, -1114963068, 1032892305, -1105610222)
+ W(4, -1113693508, -1112917766, -1116140698, -1103376612)
+ W(5, 1044830734, -1141442286, 1023234585, 1034039600) + W(6, 1033801204, -1131731326, 1045725159, -1102794347)
+ W(7, -1116748777, 1032646513, -1112562780, 1030129285) + W(8, -1119172737, 989007258, -1122523445, 1044071755)
+ W(9, 1030473357, 1018738506, 1021910870, -1122899972)
+ W(10, -1143910182, 1019358132, 1008313039, -1115540344)
+ W(11, -1126204226, -1118552369, 1016154651, -1124368226);
sum2 = W(0, -1138428449, 992976916, -1142924106, 976851025) + W(1, 989093448, 997652548, -1153131756, -1134977059)
+ W(2, -1158711528, 1013039401, -1128734961, 1036130613)
+ W(3, 1010050489, -1137359275, 990210276, -1138876101)
+ W(4, -1124467432, -1130455464, -1113146735, -1097860430)
+ W(5, -1108810723, -1122996798, -1140348735, -1127238416)
+ W(6, -1140697417, -1123518198, -1099387353, -1077268149)
+ W(7, -1091225653, 1032494444, -1115493835, 1018469149) + W(8, 1030243467, 1033499227, 1051222006, 1072898808)
+ W(9, 1056060393, 1025590581, 1025171621, 1026307569) + W(10, 1012442941, 975746961, -1122081826, -1117904739)
+ W(11, -1131990027, 951236744, 1006284898, -1146863422);
WS(-1143089152, 1030017260);
sum1 = W(0, 1012276081, 1001605962, 1024406997, -1155861797)
+ W(1, -1155627981, -1122534699, 1018348791, -1120990241)
+ W(2, -1116644609, -1127223379, -1109637089, -1115433381)
+ W(3, 1036571679, 1028189701, -1126280255, 1036379833) + W(4, 1019444907, -1119160665, 1048989101, 1044433671)
+ W(5, -1098184025, 1039597237, -1117935161, -1136463217)
+ W(6, -1124688427, 1035777366, -1098625404, 1006101820)
+ W(7, 1048780603, -1104960796, 1029641477, -1111599465)
+ W(8, 1029853709, -1136557285, 1038057505, -1111190908)
+ W(9, -1112291813, -1130076067, 1012573277, -1154886405)
+ W(10, -1130860131, -1130309965, -1130883561, 1009046005)
+ W(11, 1025361773, 1018788475, -1125993892, 1020397819);
sum2 = W(0, -1153319600, -1147284080, 1008968960, 995456320)
+ W(1, -1140178660, -1146954776, -1142030464, -1170856127)
+ W(2, 1008405084, 985822624, -1142311064, 1022276922) + W(3, -1140411728, 1005012008, 1003782736, 1006946188)
+ W(4, -1118973116, 1010505984, -1101248908, -1089187936) + W(5, -1109859050, 1017518401, 982409184, 995727232)
+ W(6, -1140784820, -1129308604, 1037448945, 1057794596) + W(7, 1029773785, 1015531414, 974134143, 960534268)
+ W(8, 1012585128, 1021293048, 1024969278, 1033366347) + W(9, 1024400778, 1007802556, 1003482728, 1009923956)
+ W(10, 1010769460, 1001814848, -1160749952, -1123619202)
+ W(11, -1136545168, 1000322872, -1152799248, 985284128);
WS(1064472528, -1121594920);
sum1 = W(0, -1142654991, -1143599223, 1014568428, 1010500896)
+ W(1, -1124387333, 1020884834, -1131205634, 1026841191)
+ W(2, 1027230343, 1032290711, -1136037408, 1002050167)
+ W(3, -1122938499, -1120250507, 1025589157, -1110863224)
+ W(4, -1112807213, -1113392623, -1115590690, -1113734161)
+ W(5, 1038834309, 1029912912, 1019867389, 1031947569) + W(6, 1027061019, 1016010466, 1034098395, -1112872467)
+ W(7, -1130883382, 1015162858, -1123484555, 1019435182) + W(8, -1128825126, 991342574, 1008695068, 1027642302)
+ W(9, 1013984188, 1015817710, 1015459258, -1129521612)
+ W(10, -1164359388, 1014490160, -1148094031, -1127829894)
+ W(11, -1138058188, -1124941766, 1008886302, -1130075526);
sum2 =
W(0, 1003807591, -1147429191, 1002635095, -1132089351) + W(1, -1147602519, -1151072125, 964968041, 1011860423)
+ W(2, -1154115373, -1139843175, 1018649088, 1016729308)
+ W(3, 1024344696, -1155997437, 1001714367, -1141691791) + W(4, 1000124719, 1001833687, 1008095031, -1105992985)
+ W(5, 1014141127, -1132427785, -1141957575, -1139390003) + W(6, 1017182228, 1024488826, 1040714709, 1063780536)
+ W(7, 1047200342, 1020609216, 1023684454, 1017456200)
+ W(8, -1126980607, -1116401990, -1105844805, -1085442794)
+ W(9, -1101306502, -1122913939, -1125194898, -1128761080)
+ W(10, -1130234859, 987658746, 1013729967, 1024604622)
+ W(11, 995366957, -1147894927, -1146690231, -1146063807);
WS(1061878800, -1131153991);
sum1 =
W(0, -1123872727, 1018625288, -1127640224, 1026350045) + W(1, 1013916191, 1018183052, -1145362866, 1016048056)
+ W(2, 1015115512, 1031144036, 1036357847, -1108974562) + W(3, -1107191102, 1006433282, 1014427177, 1026198990)
+ W(4, -1099302516, -1102371221, -1085394744, 1059569738) + W(5, 1050617832, 1032504563, 1031877738, 1033421596)
+ W(6, 1041224340, 1009910425, 1052022073, 1058525661)
+ W(7, -1088226291, -1097316565, -1109508096, -1098228398)
+ W(8, -1144166978, 1014687697, -1115552350, -1125187302) + W(9, 1037730450, 1040234099, 1015825508, 1035235966)
+ W(10, -1171049230, 1022902338, -1132534141, 1016189168)
+ W(11, -1123531112, -1127405808, 1018548825, -1137247201);
sum2 =
W(0, -1131301730, 973798558, -1127780866, 1013478572) + W(1, -1160424319, -1135840779, -1164912671, -1138738786)
+ W(2, 1031269327, -1131640108, 1013454096, -1109509101)
+ W(3, -1117315078, -1131160392, -1145619912, -1127332243)
+ W(4, -1127010401, 1028981651, -1149526184, 1051779317) + W(5, 1028380081, -1137527992, 998238336, -1148504424)
+ W(6, -1109842974, -1125259759, -1113692773, 1047088883)
+ W(7, -1134194124, 1028175261, 1018886164, 1027237057)
+ W(8, -1181736700, -1167651134, -1123287814, -1109788940)
+ W(9, -1115287133, -1121515979, -1125209194, -1142455024)
+ W(10, -1180777340, -1160957999, 993986728, 1020962386)
+ W(11, -1136947718, -1138138790, -1152989064, -1123011340);
WS(-1146021888, 1053974589);
sum1 =
W(0, 1029642476, 1013890275, -1176939092, 1022266947) + W(1, -1146466657, 1004183253, -1160650069, -1127783457)
+ W(2, -1119368753, -1134074211, 1007708103, 1017736689) + W(3, 1027345005, 1032510570, 1019378973, 1031489314)
+ W(4, 1042969521, 1042359026, 1045769551, -1101301107)
+ W(5, -1094644679, -1091538585, -1107179580, -1095508207)
+ W(6, -1095098901, -1107285127, -1096985546, 1034918881) + W(7, 1050538529, 1051699648, 1036824506, 1048776768)
+ W(8, 1046685039, 1031018217, 1036262392, 1003810877) + W(9, -1120828825, 1011534979, -1133351451, 1035618600)
+ W(10, 984849429, -1135393367, -1139413615, 1024875117)
+ W(11, -1172526890, 1017671961, -1160823333, 1006585957);
sum2 = W(0, 1031363252, 1030774484, 1015165558, 1007437656) + W(1, 1020087968, -1158035650, -1137051446, 1018262350)
+ W(2, -1091101506, 1001500224, -1110787951, 965388167) + W(3, 1003188992, 1041338676, -1134903850, 1033269727)
+ W(4, 1048232756, -1110436898, 1016237906, 1014973676)
+ W(5, -1123006886, -1105090874, -1123217223, -1104724635)
+ W(6, 1057852755, -1132290932, 1043794074, 1047525730)
+ W(7, 1011818344, -1129296549, 1034851396, -1106365430)
+ W(8, -1095952784, -1131305343, -1113356328, -1152923833)
+ W(9, -1111245491, -1131940021, -1117639196, 1024945328)
+ W(10, 1016290300, -1126601761, 1003743696, 1022650220)
+ W(11, 1021501454, 1017537464, -1133259176, 1019937714);
WS(-1077057896, -1083600334);
sum1 = W(0, 1017420011, 1014201033, 1017846781, 999765850) + W(1, -1162024122, 985808522, 1019153993, 1011766057)
+ W(2, 1011471785, 1019976613, -1138042285, 1040273597) + W(3, 1016487629, 1027730222, 1010855969, 1025127228)
+ W(4, 1029223422, -1126437509, 1049638570, -1090770241) + W(5, 1029091694, 1037672698, 1027546578, 1025680213)
+ W(6, -1116040414, 1015478313, -1103217262, -1087230893)
+ W(7, 1046437488, 1024768280, 1028909230, 1017109109) + W(8, 1017123181, 1024110818, 1023111893, 1030676769)
+ W(9, -1112046985, -1120839802, 1023955584, -1128064723)
+ W(10, 1016511669, -1167731667, 1009386661, 1023090125)
+ W(11, 1020460717, 1025489318, -1134545259, 1027741830);
sum2 =
W(0, 1023774756, -1152708847, 1005727232, -1128030280) + W(1, 1004577664, -1116453887, 1020705336, -1127775332)
+ W(2, -1107003878, 1013240776, -1108761818, 1032847770)
+ W(3, 1024878510, -1129071264, -1124253692, -1114566712)
+ W(4, 1020767940, -1108605887, 1050907301, 1058054639) + W(5, -1106188814, 1040942692, -1115446820, 1042743894)
+ W(6, -1118294055, -1128830540, -1097736561, 1008347200)
+ W(7, 1049418167, -1105809360, 1014050712, -1132221182)
+ W(8, -1113997093, -1139588328, 1032528025, 1039669350)
+ W(9, -1108856812, -1104688291, 1018266740, -1103534695)
+ W(10, 1021408408, -1119578529, -1135972104, -1131826954)
+ W(11, 999382680, 1019392776, -1117167612, 1022204104);
WS(1034686080, -1080904524);
sum1 = W(0, -1139332721, -1156631187, 1023562901, 1011454089) + W(1, 1009225011, 1012327853, 1010687981, 1010798725)
+ W(2, 1025190657, 1024127465, 1006799241, -1145135178) + W(3, -1129417722, 1017495114, 1027561839, 1010795083)
+ W(4, -1143163562, 1040892278, -1104914606, -1089193318)
+ W(5, 1043909393, -1119873834, -1136185891, -1118482716)
+ W(6, 1041601261, 1028605547, 1052908885, -1091833281)
+ W(7, -1103073573, 1025246703, -1124345098, 1032670633)
+ W(8, 1024768205, -1129308018, -1117860929, 1036300940) + W(9, 1040987970, 1033652713, 1024209623, 1027144528)
+ W(10, -1137907141, 1012089369, 1019594656, -1143330794)
+ W(11, 992909011, -1123933213, 1018355139, -1123266333);
sum2 = W(0, 998154484, -1136847690, -1120010648, -1145824866) + W(1, 1015307650, 994166396, 1000606426, 1010815409)
+ W(2, -1124228589, 1028193069, 1043298286, -1115567961)
+ W(3, -1106126812, -1111174068, -1128437454, -1110538383)
+ W(4, -1132108902, -1123281782, -1097765474, 1059221182)
+ W(5, 1048600788, -1130476352, 1026255089, -1118584150)
+ W(6, -1115676434, -1123302060, 1027211577, 1034703777)
+ W(7, -1099334080, 1015056080, -1137618020, 1028199647)
+ W(8, -1123985162, -1132306691, -1114822183, -1131429597)
+ W(9, 1029215805, 1023836215, -1127893362, 1025007180)
+ W(10, 1004957466, 1011392625, -1127542967, 1022587458)
+ W(11, -1127163397, -1122559367, -1171736302, -1124423270);
WS(-1097173920, -1100403112);
sum1 = W(0, -1133792968, -1124511038, -1133667884, -1126404688)
+ W(1, -1129657589, -1123548289, -1134226372, -1120375479)
+ W(2, -1126599342, -1134319356, -1113753548, -1113718896)
+ W(3, 1034489098, 1017816566, -1122628437, 1029275393) + W(4, 1026626987, -1112479512, 1051379210, 1058852431)
+ W(5, -1097104011, 1007326848, -1112737379, -1116837058)
+ W(6, -1109988694, -1122054529, -1097159959, 1058630415)
+ W(7, 1049904553, -1104990865, 983139170, -1110311540)
+ W(8, -1128510918, -1138055228, 1031366423, -1108453759)
+ W(9, -1111244112, -1129654222, -1143321192, -1132471000)
+ W(10, -1124691470, -1131431128, -1128464692, -1122909907)
+ W(11, 1006087192, -1138955724, -1123473736, -1149064600);
sum2 = W(0, -1133003813, -1170659932, 1006656308, 1031055883)
+ W(1, -1122785076, -1138148825, -1134424500, -1131089901)
+ W(2, -1145103116, 1024883426, -1122208183, -1101651786)
+ W(3, -1107240567, -1137951645, -1131665157, -1116779622)
+ W(4, -1105221269, -1117429423, -1098340061, 1055658740)
+ W(5, 1035604404, -1131811521, -1130287800, -1123356625)
+ W(6, 1033080040, 1028547885, 1042272545, 1058321046) + W(7, -1112738821, 1003752088, 1015669581, 1033205575)
+ W(8, 1016862101, -1128891234, -1121562483, -1100689547)
+ W(9, -1115182870, 1026865631, -1129373191, -1134576021)
+ W(10, -1129731365, -1147341896, -1121650606, 1031708925)
+ W(11, -1123396988, -1133076983, -1131162259, -1127933595);
WS(1049422752, 1064394145);
sum1 = W(0, 1016583527, 997641734, -1129211865, -1131677043) + W(1, 1021172552, 1021691141, -1133007562, 1026669144)
+ W(2, -1106085006, -1123841888, 1041111742, 1032245856)
+ W(3, -1110939130, -1105269375, -1137128282, -1106745812)
+ W(4, 995307718, 1031369872, -1088517333, -1098988005) + W(5, 1058612208, 1030057089, 1036830720, 1034516890)
+ W(6, 1042273115, 1021597381, 1058826428, -1105331685)
+ W(7, -1090507155, 1043687978, -1120823228, 1038691348)
+ W(8, -1113049442, -1122854832, -1113933244, 1032610296)
+ W(9, 1037338632, -1122591528, -1116248270, -1117945591)
+ W(10, 1025810280, 1006187755, 1019889767, -1131685097)
+ W(11, -1155049030, -1134096210, 1025994697, -1126546729);
sum2 =
W(0, 1015668141, -1124041461, 1005775275, 1015274173) + W(1, -1138139200, 992639399, -1132053353, -1129319398)
+ W(2, -1138201662, -1140877219, 1027346708, 1016303395)
+ W(3, -1106503093, -1117618841, -1115775134, -1122071744)
+ W(4, -1111996311, -1116450062, -1125910350, -1108948194)
+ W(5, -1104963655, 1031763952, 1015724405, 1034411590)
+ W(6, -1127284815, -1123578506, -1106280325, 1052974100) + W(7, 1053021197, 957951850, 1016609913, -1140595900)
+ W(8, -1125087482, 1024732308, 1034158307, 1032925063) + W(9, -1107449032, 994113735, -1132927280, -1140186580)
+ W(10, 1020174885, -1139064970, -1133423524, -1161498797)
+ W(11, -1134898868, 1013272790, -1132485274, -1164791981);
WS(-1101497152, -1084603877);
sum1 =
W(0, -1136425045, -1140218249, -1121565262, -1122160667)
+ W(1, 1006883159, -1132497425, 1004502690, -1128604789) + W(2, 1016522037, -1157845066, 1035402982, 1027884002)
+ W(3, 1025282390, 1022271101, -1138792353, 995143101) + W(4, 967194407, 1029505522, 1022903246, -1107598171)
+ W(5, 1025270942, -1120772739, -1154700189, -1128284203)
+ W(6, 1019848413, -1119357636, 1027088345, 1024422013) + W(7, -1117602990, 1030415880, -1171556244, 1025955498)
+ W(8, -1129523533, -1140249161, -1121932442, -1127296803)
+ W(9, 1030372258, -1129818261, -1138666305, -1121511513)
+ W(10, -1142614610, -1135395837, -1148904362, 1002411186)
+ W(11, -1130529549, 1018540973, -1138856043, 1011955033);
sum2 =
W(0, -1126668299, -1137964827, 1015963121, 1030757650) + W(1, -1137547503, 997720155, -1132279825, -1152589275)
+ W(2, -1131366283, -1117612139, -1097765254, -1083955387)
+ W(3, -1103492737, -1118797430, -1131387718, -1140632131)
+ W(4, 1024971228, 1034620123, 1049249869, 1064229708) + W(5, 1047078464, 1017921725, 1024786888, -1144191965)
+ W(6, 1000957181, -1119890411, 1026062254, -1107214224) + W(7, -1122275403, 1016072153, -1133941049, 996433547)
+ W(8, -1151515419, -1145021381, 1001872029, 1026637176)
+ W(9, 1027173860, -1135832789, -1148432117, -1140699475)
+ W(10, 1005199725, -1136862175, 1007955643, -1125717658)
+ W(11, -1169614250, 923654805, 1002011725, 1005736109);
WS(1059552336, -1136539026);
sum1 = W(0, 990367896, 1010957914, -1154541352, 1017372383) + W(1, 1026284586, 1015461809, 1014364322, 1021777309)
+ W(2, 1041343484, 1036710283, 1045269292, -1112141659) + W(3, -1116984235, 1018013925, 1006339428, 1032228520)
+ W(4, -1096612504, -1107358947, -1087221074, 1058232297)
+ W(5, 1051438086, 1047713030, 1032067931, 1045851190) + W(6, 1033353841, 1029016441, 1042554433, 1029783110)
+ W(7, -1087458720, -1095293300, -1114380761, -1099415088)
+ W(8, -1125599349, -1132821402, -1154580200, -1114120867)
+ W(9, 1033522371, 1032365167, 1004597796, 1030006574) + W(10, 1028944863, 1024290996, 1023892422, 1023410731)
+ W(11, -1144215764, -1144750420, 1001346936, -1130073781);
sum2 = W(0, -1153914788, -1126180245, 995844276, -1141303142) + W(1, 1026951758, 990856044, 1004850742, 1015235936)
+ W(2, -1101809160, -1115520194, -1114410922, 1033573989)
+ W(3, 998799030, -1110191966, -1129252703, 1016759632) + W(4, 1052877341, 1022007580, 1055965696, -1098900400)
+ W(5, 1023481081, 1023185808, 1031073312, -1104219784) + W(6, 1046574229, 1000424166, 1034680258, -1098051352)
+ W(7, 1045079279, -1106708743, 984863273, -1103050031)
+ W(8, -1095334336, -1113807813, -1109583292, 1033797491)
+ W(9, 1032986287, 1025876178, -1137844345, 1038371038)
+ W(10, 1023520281, 1021218858, 1008634443, -1115608949)
+ W(11, 1032307290, -1128938562, 1017335440, 1020607644);
WS(-1080660584, -1085825159);
sum1 = W(0, 1013708199, 998525366, 1026247587, 1009982783) + W(1, -1136883881, -1128223794, 999596614, -1126850910)
+ W(2, -1123370319, -1131079022, -1110742584, -1113918027)
+ W(3, 1032009669, 1025751155, -1126831290, 1033490448)
+ W(4, -1145658646, -1111041043, 1047524760, 1038280501)
+ W(5, -1096311074, 1042402294, -1118872454, 1023947050)
+ W(6, -1118786339, 1035331132, -1095527502, 1041941518)
+ W(7, 1051037928, -1106649743, 1032615945, -1111971999)
+ W(8, 1028171867, 1017605134, 1042485668, -1110999603) + W(9, -1106204846, -1132447358, 1002160934, 971034337)
+ W(10, -1144908790, -1131113128, -1130744068, 992723116)
+ W(11, 1025830203, 1017749654, -1127230527, 1018668086);
sum2 = W(0, 988660617, -1145776834, -1133422913, -1126488793) + W(1, 999416722, 999055930, 987033481, -1179419172)
+ W(2, 1017543700, 1002138986, 1031822055, 1032911160) + W(3, 1018658069, 1007351961, -1162261577, 999395634)
+ W(4, 1015794522, 1001599498, 1041494739, 1056510750) + W(5, 1023998101, -1138508317, 991201876, -1141702058)
+ W(6, -1133704409, 1024621822, -1102581932, -1089051586)
+ W(7, -1111744235, 1009295285, -1140892226, -1147317506)
+ W(8, 1003471274, -1135257421, 970658596, 1026713544) + W(9, 945757471, 998080468, -1156050276, 1007988669)
+ W(10, -1140119133, -1136500105, -1163479081, 1009057465)
+ W(11, 1000517690, -1137960905, -1186683976, -1146609818);
WS(1064784784, -1120346387);
sum1 =
W(0, -1150678408, 1000581420, 1006062348, 1015088861) + W(1, -1136518116, -1126533711, 1012432222, -1122704190)
+ W(2, 1015721531, -1110077251, -1107299655, -1132691862)
+ W(3, -1148196556, 1013224070, -1124886999, 1014270588) + W(4, 1049255678, 1043607805, 1059242626, -1123916922)
+ W(5, -1096371932, -1100407642, -1132580564, -1102354822)
+ W(6, -1099108228, -1107416484, -1089544734, -1130977491)
+ W(7, 1057929313, 1048500643, 1035479729, 1044504531) + W(8, -1149551256, 1017163947, 1023526494, 1022505321)
+ W(9, -1104456865, -1111675367, -1127245287, 1007459698)
+ W(10, -1136953142, -1140022794, -1139533474, 1012221798)
+ W(11, 1014035238, 1026165050, -1136458552, 1017479699);
sum2 = W(0, -1140771860, -1125513054, -1131667695, -1131288705)
+ W(1, 978159878, 1012596136, 1011933560, -1121534607) + W(2, 1031694512, -1122230843, 979286214, 1025077104)
+ W(3, -1108853537, -1102087836, -1117751010, -1113389694)
+ W(4, -1104948969, 993388690, 1026183534, -1111209187) + W(5, 1041617383, 1045410736, 1030126174, 1044425994)
+ W(6, -1115570202, 1042093481, 1042830623, -1112764939) + W(7, 1043422569, 1034771561, 1014235016, 1025820984)
+ W(8, 1040745971, -1111499166, -1119680402, 982469091)
+ W(9, -1120447085, -1109907689, -1127258987, -1115100280)
+ W(10, -1127298441, 995262946, 1002124441, -1123012516)
+ W(11, -1129740789, -1125016939, 1004566649, -1119639931);
WS(-1088649680, 1067112300);
return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);
}
shared float inp[555];
#define CURRENT_PASS 2
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define temp_tex(pos) (float(texture(temp, pos).x))
static const float2 temp_size = float2(GetInputSize().x * 1, GetInputSize().y * 2);
static const float2 temp_pt = float2(1.0 / (temp_size.x), 1.0 / (temp_size.y));
#define HOOKED_tex(pos) temp_tex(pos)
#define HOOKED_size temp_size
#define HOOKED_pt temp_pt
void Pass2(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 555; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 15, y = (uint)id % 15;
inp[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x - (2)) + 0.5, float(group_base.y + y - (3)) + 0.5)).x;
}
barrier();
vec4 ret = vec4(0.0, 0.0, 0.0, 0.0);
vec4 ret0 = vec4(0.0, 0.0, 0.0, 0.0);
vec4 samples[12];
samples[0][0] = inp[local_pos + 0];
samples[0][1] = inp[local_pos + 1];
samples[0][2] = inp[local_pos + 2];
samples[0][3] = inp[local_pos + 3];
samples[1][0] = inp[local_pos + 4];
samples[1][1] = inp[local_pos + 5];
samples[1][2] = inp[local_pos + 6];
samples[1][3] = inp[local_pos + 7];
samples[2][0] = inp[local_pos + 15];
samples[2][1] = inp[local_pos + 16];
samples[2][2] = inp[local_pos + 17];
samples[2][3] = inp[local_pos + 18];
samples[3][0] = inp[local_pos + 19];
samples[3][1] = inp[local_pos + 20];
samples[3][2] = inp[local_pos + 21];
samples[3][3] = inp[local_pos + 22];
samples[4][0] = inp[local_pos + 30];
samples[4][1] = inp[local_pos + 31];
samples[4][2] = inp[local_pos + 32];
samples[4][3] = inp[local_pos + 33];
samples[5][0] = inp[local_pos + 34];
samples[5][1] = inp[local_pos + 35];
samples[5][2] = inp[local_pos + 36];
samples[5][3] = inp[local_pos + 37];
samples[6][0] = inp[local_pos + 45];
samples[6][1] = inp[local_pos + 46];
samples[6][2] = inp[local_pos + 47];
samples[6][3] = inp[local_pos + 48];
samples[7][0] = inp[local_pos + 49];
samples[7][1] = inp[local_pos + 50];
samples[7][2] = inp[local_pos + 51];
samples[7][3] = inp[local_pos + 52];
samples[8][0] = inp[local_pos + 60];
samples[8][1] = inp[local_pos + 61];
samples[8][2] = inp[local_pos + 62];
samples[8][3] = inp[local_pos + 63];
samples[9][0] = inp[local_pos + 64];
samples[9][1] = inp[local_pos + 65];
samples[9][2] = inp[local_pos + 66];
samples[9][3] = inp[local_pos + 67];
samples[10][0] = inp[local_pos + 75];
samples[10][1] = inp[local_pos + 76];
samples[10][2] = inp[local_pos + 77];
samples[10][3] = inp[local_pos + 78];
samples[11][0] = inp[local_pos + 79];
samples[11][1] = inp[local_pos + 80];
samples[11][2] = inp[local_pos + 81];
samples[11][3] = inp[local_pos + 82];
ret[0] = nnedi3(samples);
ret0[0] = inp[local_pos + 33];
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1), ret0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * ivec2(2, 1) + ivec2(1, 0), ret);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,73 @@
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Conversion from GLSL to HLSL is done through defines as much as possible to ease synchronization and comparison with upstream
#define ivec2 int2
#define vec2 float2
#define vec3 float3
#define vec4 float4
#define mat4x3 float4x3
#define matrixCompMult(mtx1, mtx2) (mtx1 * mtx2)
#define shared groupshared
#define atan atan2
#define barrier GroupMemoryBarrierWithGroupSync
#define fract frac
#define intBitsToFloat asfloat
#define inversesqrt rsqrt
// mod deals only with positive numbers here and it could be substituted by fmod
#define mod fmod
// lerp handles bools as the third argument differently from mix
float mix(float a, float b, bool c) {
return c ? b : a;
}
#define MIX_LERP(type1, type3) type1 mix(type1 a, type1 b, type3 c) { return lerp(a, b, c); }
MIX_LERP(float, float)
MIX_LERP(float2, float2)
MIX_LERP(float3, float)
MIX_LERP(float4, float)
#define texture(tex, pos) tex.SampleLevel(sam_##tex, pos, 0.0)
#define OUTPUT_pt float2(GetOutputPt())
#define frag_pos(id) (vec2(id) + vec2(0.5, 0.5))
#define frag_map(id) (OUTPUT_pt * frag_pos(id))
#define HOOKED_map(id) frag_map(id)
#define gl_LocalInvocationIndex (threadId.y*MP_NUM_THREADS_X + threadId.x)
#define gl_LocalInvocationID threadId
#define gl_WorkGroupSize (uint2(MP_NUM_THREADS_X, MP_NUM_THREADS_Y))
#define gl_WorkGroupID (blockStart / uint2(MP_BLOCK_WIDTH, MP_BLOCK_HEIGHT))
#define gl_GlobalInvocationID (gl_WorkGroupID*gl_WorkGroupSize + threadId.xy)
// disable warning about unknown pragma
#pragma warning(disable: 3568)
// disable warning about too many threads (ravu-r4-rgb triggers it)
#pragma warning(disable: 4714)
// https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-6-201506-I!!PDF-E.pdf
static const float3 rgb2y = float3(0.2126, 0.7152, 0.0722);
static const float2x3 rgb2uv = {
-0.2126/1.8556, -0.7152/1.8556, 0.9278/1.8556,
0.7874/1.5748, -0.7152/1.5748, -0.0722/1.5748
};
static const float3x3 yuv2rgb = {
1, 0, 1.5748,
1, -0.187324, -0.468124,
1, 1.8556, 0
};

View file

@ -0,0 +1,176 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-3x.py --target luma --weights-file weights\ravu-3x_weights-r2.py --float-format float16dx --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_3x_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_3x_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_3x_lut2;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-3x (luma, r2)
//!IN INPUT, ravu_3x_lut2
//!OUT OUTPUT
//!BLOCK_SIZE 96, 24
//!NUM_THREADS 32, 8
shared float inp[340];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_3x_lut2_tex(pos) (vec4(texture(ravu_3x_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 10, y = (uint)id % 10;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 3;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
float luma0 = inp[local_pos + 0];
float luma1 = inp[local_pos + 1];
float luma2 = inp[local_pos + 2];
float luma3 = inp[local_pos + 10];
float luma4 = inp[local_pos + 11];
float luma5 = inp[local_pos + 12];
float luma6 = inp[local_pos + 20];
float luma7 = inp[local_pos + 21];
float luma8 = inp[local_pos + 22];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma3 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma4 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma5 - luma2);
gy = (luma2 - luma1);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma6 - luma0) / 2.0;
gy = (luma4 - luma3);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma7 - luma1) / 2.0;
gy = (luma5 - luma3) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (luma8 - luma2) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma6 - luma3);
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma7 - luma4);
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma8 - luma5);
gy = (luma8 - luma7);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0;
vec4 res0 = vec4(0.0, 0.0, 0.0, 0.0), res1 = vec4(0.0, 0.0, 0.0, 0.0);
vec4 w0, w1;
w0 = texture(ravu_3x_lut2, vec2(0.05, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.15, coord_y));
res0 += luma0 * w0 + luma8 * w1.wzyx;
res1 += luma0 * w1 + luma8 * w0.wzyx;
w0 = texture(ravu_3x_lut2, vec2(0.25, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.35, coord_y));
res0 += luma1 * w0 + luma7 * w1.wzyx;
res1 += luma1 * w1 + luma7 * w0.wzyx;
w0 = texture(ravu_3x_lut2, vec2(0.45, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.55, coord_y));
res0 += luma2 * w0 + luma6 * w1.wzyx;
res1 += luma2 * w1 + luma6 * w0.wzyx;
w0 = texture(ravu_3x_lut2, vec2(0.65, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.75, coord_y));
res0 += luma3 * w0 + luma5 * w1.wzyx;
res1 += luma3 * w1 + luma5 * w0.wzyx;
w0 = texture(ravu_3x_lut2, vec2(0.85, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.95, coord_y));
res0 += luma4 * w0;
res1 += luma4 * w1;
res0 = clamp(res0, 0.0, 1.0);
res1 = clamp(res1, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), res0[0]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), res0[1]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), res0[2]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), res0[3]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), luma4);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), res1[0]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), res1[1]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), res1[2]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), res1[3]);
}

View file

@ -0,0 +1,180 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-3x.py --target rgb --weights-file weights\ravu-3x_weights-r2.py --float-format float16dx --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_3x_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_3x_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_3x_lut2;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-3x (rgb, r2)
//!IN INPUT, ravu_3x_lut2
//!OUT OUTPUT
//!BLOCK_SIZE 96, 24
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
// HLSL doesn't have outerProduct
float4x3 outerProduct(float3 l, float4 r) { return mul(float4x1(r), float1x3(l)); }
shared vec3 inp[340];
shared float inp_luma[340];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_3x_lut2_tex(pos) (vec4(texture(ravu_3x_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 10, y = (uint)id % 10;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).xyz;
inp_luma[id] = dot(inp[id], color_primary);
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 3;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
float luma0 = inp_luma[local_pos + 0];
float luma1 = inp_luma[local_pos + 1];
float luma2 = inp_luma[local_pos + 2];
float luma3 = inp_luma[local_pos + 10];
float luma4 = inp_luma[local_pos + 11];
float luma5 = inp_luma[local_pos + 12];
float luma6 = inp_luma[local_pos + 20];
float luma7 = inp_luma[local_pos + 21];
float luma8 = inp_luma[local_pos + 22];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma3 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma4 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma5 - luma2);
gy = (luma2 - luma1);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma6 - luma0) / 2.0;
gy = (luma4 - luma3);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma7 - luma1) / 2.0;
gy = (luma5 - luma3) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (luma8 - luma2) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma6 - luma3);
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma7 - luma4);
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma8 - luma5);
gy = (luma8 - luma7);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0;
mat4x3 res0 = 0.0, res1 = 0.0;
vec4 w0, w1;
w0 = texture(ravu_3x_lut2, vec2(0.05, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.15, coord_y));
res0 += outerProduct(inp[local_pos + 0], w0) + outerProduct(inp[local_pos + 22], w1.wzyx);
res1 += outerProduct(inp[local_pos + 0], w1) + outerProduct(inp[local_pos + 22], w0.wzyx);
w0 = texture(ravu_3x_lut2, vec2(0.25, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.35, coord_y));
res0 += outerProduct(inp[local_pos + 1], w0) + outerProduct(inp[local_pos + 21], w1.wzyx);
res1 += outerProduct(inp[local_pos + 1], w1) + outerProduct(inp[local_pos + 21], w0.wzyx);
w0 = texture(ravu_3x_lut2, vec2(0.45, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.55, coord_y));
res0 += outerProduct(inp[local_pos + 2], w0) + outerProduct(inp[local_pos + 20], w1.wzyx);
res1 += outerProduct(inp[local_pos + 2], w1) + outerProduct(inp[local_pos + 20], w0.wzyx);
w0 = texture(ravu_3x_lut2, vec2(0.65, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.75, coord_y));
res0 += outerProduct(inp[local_pos + 10], w0) + outerProduct(inp[local_pos + 12], w1.wzyx);
res1 += outerProduct(inp[local_pos + 10], w1) + outerProduct(inp[local_pos + 12], w0.wzyx);
w0 = texture(ravu_3x_lut2, vec2(0.85, coord_y));
w1 = texture(ravu_3x_lut2, vec2(0.95, coord_y));
res0 += outerProduct(inp[local_pos + 11], w0);
res1 += outerProduct(inp[local_pos + 11], w1);
res0[0] = clamp(res0[0], 0.0, 1.0);
res0[1] = clamp(res0[1], 0.0, 1.0);
res0[2] = clamp(res0[2], 0.0, 1.0);
res0[3] = clamp(res0[3], 0.0, 1.0);
res1[0] = clamp(res1[0], 0.0, 1.0);
res1[1] = clamp(res1[1], 0.0, 1.0);
res1[2] = clamp(res1[2], 0.0, 1.0);
res1[3] = clamp(res1[3], 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), vec4(res0[0], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), vec4(res0[1], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), vec4(res0[2], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), vec4(res0[3], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), vec4(inp[local_pos + 11], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), vec4(res1[0], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), vec4(res1[1], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), vec4(res1[2], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), vec4(res1[3], 1.0));
}

View file

@ -0,0 +1,224 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-3x.py --target luma --weights-file weights\ravu-3x_weights-r3.py --float-format float16dx --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_3x_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_3x_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_3x_lut3;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-3x (luma, r3)
//!IN INPUT, ravu_3x_lut3
//!OUT OUTPUT
//!BLOCK_SIZE 96, 24
//!NUM_THREADS 32, 8
shared float inp[432];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_3x_lut3_tex(pos) (vec4(texture(ravu_3x_lut3, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 12 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 432; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 12, y = (uint)id % 12;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 3;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
float luma0 = inp[local_pos + 0];
float luma1 = inp[local_pos + 1];
float luma2 = inp[local_pos + 2];
float luma3 = inp[local_pos + 3];
float luma4 = inp[local_pos + 4];
float luma5 = inp[local_pos + 12];
float luma6 = inp[local_pos + 13];
float luma7 = inp[local_pos + 14];
float luma8 = inp[local_pos + 15];
float luma9 = inp[local_pos + 16];
float luma10 = inp[local_pos + 24];
float luma11 = inp[local_pos + 25];
float luma12 = inp[local_pos + 26];
float luma13 = inp[local_pos + 27];
float luma14 = inp[local_pos + 28];
float luma15 = inp[local_pos + 36];
float luma16 = inp[local_pos + 37];
float luma17 = inp[local_pos + 38];
float luma18 = inp[local_pos + 39];
float luma19 = inp[local_pos + 40];
float luma20 = inp[local_pos + 48];
float luma21 = inp[local_pos + 49];
float luma22 = inp[local_pos + 50];
float luma23 = inp[local_pos + 51];
float luma24 = inp[local_pos + 52];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma11 - luma1) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma12 - luma2) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma13 - luma3) / 2.0;
gy = (luma9 - luma7) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma16 - luma6) / 2.0;
gy = (luma12 - luma10) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma17 - luma7) / 2.0;
gy = (luma13 - luma11) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (luma18 - luma8) / 2.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma21 - luma11) / 2.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma22 - luma12) / 2.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma23 - luma13) / 2.0;
gy = (luma19 - luma17) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0;
vec4 res0 = vec4(0.0, 0.0, 0.0, 0.0), res1 = vec4(0.0, 0.0, 0.0, 0.0);
vec4 w0, w1;
w0 = texture(ravu_3x_lut3, vec2(0.019230769230769232, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.057692307692307696, coord_y));
res0 += luma0 * w0 + luma24 * w1.wzyx;
res1 += luma0 * w1 + luma24 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.09615384615384616, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.1346153846153846, coord_y));
res0 += luma1 * w0 + luma23 * w1.wzyx;
res1 += luma1 * w1 + luma23 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.17307692307692307, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.21153846153846154, coord_y));
res0 += luma2 * w0 + luma22 * w1.wzyx;
res1 += luma2 * w1 + luma22 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.25, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.28846153846153844, coord_y));
res0 += luma3 * w0 + luma21 * w1.wzyx;
res1 += luma3 * w1 + luma21 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.3269230769230769, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.36538461538461536, coord_y));
res0 += luma4 * w0 + luma20 * w1.wzyx;
res1 += luma4 * w1 + luma20 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.40384615384615385, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.4423076923076923, coord_y));
res0 += luma5 * w0 + luma19 * w1.wzyx;
res1 += luma5 * w1 + luma19 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.4807692307692308, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.5192307692307693, coord_y));
res0 += luma6 * w0 + luma18 * w1.wzyx;
res1 += luma6 * w1 + luma18 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.5576923076923077, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.5961538461538461, coord_y));
res0 += luma7 * w0 + luma17 * w1.wzyx;
res1 += luma7 * w1 + luma17 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.6346153846153846, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.6730769230769231, coord_y));
res0 += luma8 * w0 + luma16 * w1.wzyx;
res1 += luma8 * w1 + luma16 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.7115384615384616, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.75, coord_y));
res0 += luma9 * w0 + luma15 * w1.wzyx;
res1 += luma9 * w1 + luma15 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.7884615384615384, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.8269230769230769, coord_y));
res0 += luma10 * w0 + luma14 * w1.wzyx;
res1 += luma10 * w1 + luma14 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.8653846153846154, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.9038461538461539, coord_y));
res0 += luma11 * w0 + luma13 * w1.wzyx;
res1 += luma11 * w1 + luma13 * w0.wzyx;
w0 = texture(ravu_3x_lut3, vec2(0.9423076923076923, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.9807692307692307, coord_y));
res0 += luma12 * w0;
res1 += luma12 * w1;
res0 = clamp(res0, 0.0, 1.0);
res1 = clamp(res1, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), res0[0]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), res0[1]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), res0[2]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), res0[3]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), luma12);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), res1[0]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), res1[1]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), res1[2]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), res1[3]);
}

View file

@ -0,0 +1,224 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-3x.py --target rgb --weights-file weights\ravu-3x_weights-r3.py --float-format float16dx --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_3x_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_3x_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_3x_lut3;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-3x (rgb, r3)
//!IN INPUT, ravu_3x_lut3
//!OUT OUTPUT
//!BLOCK_SIZE 96, 24
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
// HLSL doesn't have outerProduct
float4x3 outerProduct(float3 l, float4 r) { return mul(float4x1(r), float1x3(l)); }
shared vec3 inp[432];
shared float inp_luma[432];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_3x_lut3_tex(pos) (vec4(texture(ravu_3x_lut3, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 12 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 432; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 12, y = (uint)id % 12;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).xyz;
inp_luma[id] = dot(inp[id], color_primary);
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 3;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
float luma1 = inp_luma[local_pos + 1];
float luma2 = inp_luma[local_pos + 2];
float luma3 = inp_luma[local_pos + 3];
float luma5 = inp_luma[local_pos + 12];
float luma6 = inp_luma[local_pos + 13];
float luma7 = inp_luma[local_pos + 14];
float luma8 = inp_luma[local_pos + 15];
float luma9 = inp_luma[local_pos + 16];
float luma10 = inp_luma[local_pos + 24];
float luma11 = inp_luma[local_pos + 25];
float luma12 = inp_luma[local_pos + 26];
float luma13 = inp_luma[local_pos + 27];
float luma14 = inp_luma[local_pos + 28];
float luma15 = inp_luma[local_pos + 36];
float luma16 = inp_luma[local_pos + 37];
float luma17 = inp_luma[local_pos + 38];
float luma18 = inp_luma[local_pos + 39];
float luma19 = inp_luma[local_pos + 40];
float luma21 = inp_luma[local_pos + 49];
float luma22 = inp_luma[local_pos + 50];
float luma23 = inp_luma[local_pos + 51];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma11 - luma1) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma12 - luma2) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma13 - luma3) / 2.0;
gy = (luma9 - luma7) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma16 - luma6) / 2.0;
gy = (luma12 - luma10) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma17 - luma7) / 2.0;
gy = (luma13 - luma11) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (luma18 - luma8) / 2.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma21 - luma11) / 2.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (luma22 - luma12) / 2.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (luma23 - luma13) / 2.0;
gy = (luma19 - luma17) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0;
mat4x3 res0 = 0.0, res1 = 0.0;
vec4 w0, w1;
w0 = texture(ravu_3x_lut3, vec2(0.019230769230769232, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.057692307692307696, coord_y));
res0 += outerProduct(inp[local_pos + 0], w0) + outerProduct(inp[local_pos + 52], w1.wzyx);
res1 += outerProduct(inp[local_pos + 0], w1) + outerProduct(inp[local_pos + 52], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.09615384615384616, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.1346153846153846, coord_y));
res0 += outerProduct(inp[local_pos + 1], w0) + outerProduct(inp[local_pos + 51], w1.wzyx);
res1 += outerProduct(inp[local_pos + 1], w1) + outerProduct(inp[local_pos + 51], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.17307692307692307, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.21153846153846154, coord_y));
res0 += outerProduct(inp[local_pos + 2], w0) + outerProduct(inp[local_pos + 50], w1.wzyx);
res1 += outerProduct(inp[local_pos + 2], w1) + outerProduct(inp[local_pos + 50], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.25, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.28846153846153844, coord_y));
res0 += outerProduct(inp[local_pos + 3], w0) + outerProduct(inp[local_pos + 49], w1.wzyx);
res1 += outerProduct(inp[local_pos + 3], w1) + outerProduct(inp[local_pos + 49], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.3269230769230769, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.36538461538461536, coord_y));
res0 += outerProduct(inp[local_pos + 4], w0) + outerProduct(inp[local_pos + 48], w1.wzyx);
res1 += outerProduct(inp[local_pos + 4], w1) + outerProduct(inp[local_pos + 48], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.40384615384615385, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.4423076923076923, coord_y));
res0 += outerProduct(inp[local_pos + 12], w0) + outerProduct(inp[local_pos + 40], w1.wzyx);
res1 += outerProduct(inp[local_pos + 12], w1) + outerProduct(inp[local_pos + 40], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.4807692307692308, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.5192307692307693, coord_y));
res0 += outerProduct(inp[local_pos + 13], w0) + outerProduct(inp[local_pos + 39], w1.wzyx);
res1 += outerProduct(inp[local_pos + 13], w1) + outerProduct(inp[local_pos + 39], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.5576923076923077, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.5961538461538461, coord_y));
res0 += outerProduct(inp[local_pos + 14], w0) + outerProduct(inp[local_pos + 38], w1.wzyx);
res1 += outerProduct(inp[local_pos + 14], w1) + outerProduct(inp[local_pos + 38], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.6346153846153846, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.6730769230769231, coord_y));
res0 += outerProduct(inp[local_pos + 15], w0) + outerProduct(inp[local_pos + 37], w1.wzyx);
res1 += outerProduct(inp[local_pos + 15], w1) + outerProduct(inp[local_pos + 37], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.7115384615384616, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.75, coord_y));
res0 += outerProduct(inp[local_pos + 16], w0) + outerProduct(inp[local_pos + 36], w1.wzyx);
res1 += outerProduct(inp[local_pos + 16], w1) + outerProduct(inp[local_pos + 36], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.7884615384615384, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.8269230769230769, coord_y));
res0 += outerProduct(inp[local_pos + 24], w0) + outerProduct(inp[local_pos + 28], w1.wzyx);
res1 += outerProduct(inp[local_pos + 24], w1) + outerProduct(inp[local_pos + 28], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.8653846153846154, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.9038461538461539, coord_y));
res0 += outerProduct(inp[local_pos + 25], w0) + outerProduct(inp[local_pos + 27], w1.wzyx);
res1 += outerProduct(inp[local_pos + 25], w1) + outerProduct(inp[local_pos + 27], w0.wzyx);
w0 = texture(ravu_3x_lut3, vec2(0.9423076923076923, coord_y));
w1 = texture(ravu_3x_lut3, vec2(0.9807692307692307, coord_y));
res0 += outerProduct(inp[local_pos + 26], w0);
res1 += outerProduct(inp[local_pos + 26], w1);
res0[0] = clamp(res0[0], 0.0, 1.0);
res0[1] = clamp(res0[1], 0.0, 1.0);
res0[2] = clamp(res0[2], 0.0, 1.0);
res0[3] = clamp(res0[3], 0.0, 1.0);
res1[0] = clamp(res1[0], 0.0, 1.0);
res1[1] = clamp(res1[1], 0.0, 1.0);
res1[2] = clamp(res1[2], 0.0, 1.0);
res1[3] = clamp(res1[3], 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), vec4(res0[0], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), vec4(res0[1], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), vec4(res0[2], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), vec4(res0[3], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), vec4(inp[local_pos + 26], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), vec4(res1[0], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), vec4(res1[1], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), vec4(res1[2], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), vec4(res1[3], 1.0));
}

View file

@ -0,0 +1,344 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-3x.py --target luma --weights-file weights\ravu-3x_weights-r4.py --float-format float16dx --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_3x_lut4_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_3x_lut4;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_3x_lut4;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-3x (luma, r4)
//!IN INPUT, ravu_3x_lut4
//!OUT OUTPUT
//!BLOCK_SIZE 96, 24
//!NUM_THREADS 32, 8
shared float inp[532];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_3x_lut4_tex(pos) (vec4(texture(ravu_3x_lut4, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 14 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 532; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 14, y = (uint)id % 14;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 3;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
float luma0 = inp[local_pos + 0];
float luma1 = inp[local_pos + 1];
float luma2 = inp[local_pos + 2];
float luma3 = inp[local_pos + 3];
float luma4 = inp[local_pos + 4];
float luma5 = inp[local_pos + 5];
float luma6 = inp[local_pos + 6];
float luma7 = inp[local_pos + 14];
float luma8 = inp[local_pos + 15];
float luma9 = inp[local_pos + 16];
float luma10 = inp[local_pos + 17];
float luma11 = inp[local_pos + 18];
float luma12 = inp[local_pos + 19];
float luma13 = inp[local_pos + 20];
float luma14 = inp[local_pos + 28];
float luma15 = inp[local_pos + 29];
float luma16 = inp[local_pos + 30];
float luma17 = inp[local_pos + 31];
float luma18 = inp[local_pos + 32];
float luma19 = inp[local_pos + 33];
float luma20 = inp[local_pos + 34];
float luma21 = inp[local_pos + 42];
float luma22 = inp[local_pos + 43];
float luma23 = inp[local_pos + 44];
float luma24 = inp[local_pos + 45];
float luma25 = inp[local_pos + 46];
float luma26 = inp[local_pos + 47];
float luma27 = inp[local_pos + 48];
float luma28 = inp[local_pos + 56];
float luma29 = inp[local_pos + 57];
float luma30 = inp[local_pos + 58];
float luma31 = inp[local_pos + 59];
float luma32 = inp[local_pos + 60];
float luma33 = inp[local_pos + 61];
float luma34 = inp[local_pos + 62];
float luma35 = inp[local_pos + 70];
float luma36 = inp[local_pos + 71];
float luma37 = inp[local_pos + 72];
float luma38 = inp[local_pos + 73];
float luma39 = inp[local_pos + 74];
float luma40 = inp[local_pos + 75];
float luma41 = inp[local_pos + 76];
float luma42 = inp[local_pos + 84];
float luma43 = inp[local_pos + 85];
float luma44 = inp[local_pos + 86];
float luma45 = inp[local_pos + 87];
float luma46 = inp[local_pos + 88];
float luma47 = inp[local_pos + 89];
float luma48 = inp[local_pos + 90];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma15 - luma1) / 2.0;
gy = (luma9 - luma7) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (luma16 - luma2) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma17 - luma3) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (luma18 - luma4) / 2.0;
gy = (luma12 - luma10) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma19 - luma5) / 2.0;
gy = (luma13 - luma11) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (luma22 - luma8) / 2.0;
gy = (luma16 - luma14) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma23 - luma9) / 2.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (luma24 - luma10) / 2.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (luma25 - luma11) / 2.0;
gy = (luma19 - luma17) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (luma26 - luma12) / 2.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma29 - luma15) / 2.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (luma30 - luma16) / 2.0;
gy = (luma24 - luma22) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (luma31 - luma17) / 2.0;
gy = (luma25 - luma23) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06319146241026467;
gx = (luma32 - luma18) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (luma33 - luma19) / 2.0;
gy = (luma27 - luma25) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (luma36 - luma22) / 2.0;
gy = (luma30 - luma28) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma37 - luma23) / 2.0;
gy = (luma31 - luma29) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (luma38 - luma24) / 2.0;
gy = (luma32 - luma30) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (luma39 - luma25) / 2.0;
gy = (luma33 - luma31) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (luma40 - luma26) / 2.0;
gy = (luma34 - luma32) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma43 - luma29) / 2.0;
gy = (luma37 - luma35) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (luma44 - luma30) / 2.0;
gy = (luma38 - luma36) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma45 - luma31) / 2.0;
gy = (luma39 - luma37) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (luma46 - luma32) / 2.0;
gy = (luma40 - luma38) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma47 - luma33) / 2.0;
gy = (luma41 - luma39) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0;
vec4 res0 = vec4(0.0, 0.0, 0.0, 0.0), res1 = vec4(0.0, 0.0, 0.0, 0.0);
vec4 w0, w1;
w0 = texture(ravu_3x_lut4, vec2(0.01, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.03, coord_y));
res0 += luma0 * w0 + luma48 * w1.wzyx;
res1 += luma0 * w1 + luma48 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.05, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.07, coord_y));
res0 += luma1 * w0 + luma47 * w1.wzyx;
res1 += luma1 * w1 + luma47 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.09, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.11, coord_y));
res0 += luma2 * w0 + luma46 * w1.wzyx;
res1 += luma2 * w1 + luma46 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.13, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.15, coord_y));
res0 += luma3 * w0 + luma45 * w1.wzyx;
res1 += luma3 * w1 + luma45 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.17, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.19, coord_y));
res0 += luma4 * w0 + luma44 * w1.wzyx;
res1 += luma4 * w1 + luma44 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.21, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.23, coord_y));
res0 += luma5 * w0 + luma43 * w1.wzyx;
res1 += luma5 * w1 + luma43 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.25, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.27, coord_y));
res0 += luma6 * w0 + luma42 * w1.wzyx;
res1 += luma6 * w1 + luma42 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.29, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.31, coord_y));
res0 += luma7 * w0 + luma41 * w1.wzyx;
res1 += luma7 * w1 + luma41 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.33, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.35, coord_y));
res0 += luma8 * w0 + luma40 * w1.wzyx;
res1 += luma8 * w1 + luma40 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.37, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.39, coord_y));
res0 += luma9 * w0 + luma39 * w1.wzyx;
res1 += luma9 * w1 + luma39 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.41, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.43, coord_y));
res0 += luma10 * w0 + luma38 * w1.wzyx;
res1 += luma10 * w1 + luma38 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.45, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.47, coord_y));
res0 += luma11 * w0 + luma37 * w1.wzyx;
res1 += luma11 * w1 + luma37 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.49, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.51, coord_y));
res0 += luma12 * w0 + luma36 * w1.wzyx;
res1 += luma12 * w1 + luma36 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.53, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.55, coord_y));
res0 += luma13 * w0 + luma35 * w1.wzyx;
res1 += luma13 * w1 + luma35 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.57, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.59, coord_y));
res0 += luma14 * w0 + luma34 * w1.wzyx;
res1 += luma14 * w1 + luma34 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.61, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.63, coord_y));
res0 += luma15 * w0 + luma33 * w1.wzyx;
res1 += luma15 * w1 + luma33 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.65, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.67, coord_y));
res0 += luma16 * w0 + luma32 * w1.wzyx;
res1 += luma16 * w1 + luma32 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.69, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.71, coord_y));
res0 += luma17 * w0 + luma31 * w1.wzyx;
res1 += luma17 * w1 + luma31 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.73, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.75, coord_y));
res0 += luma18 * w0 + luma30 * w1.wzyx;
res1 += luma18 * w1 + luma30 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.77, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.79, coord_y));
res0 += luma19 * w0 + luma29 * w1.wzyx;
res1 += luma19 * w1 + luma29 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.81, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.83, coord_y));
res0 += luma20 * w0 + luma28 * w1.wzyx;
res1 += luma20 * w1 + luma28 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.85, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.87, coord_y));
res0 += luma21 * w0 + luma27 * w1.wzyx;
res1 += luma21 * w1 + luma27 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.89, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.91, coord_y));
res0 += luma22 * w0 + luma26 * w1.wzyx;
res1 += luma22 * w1 + luma26 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.93, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.95, coord_y));
res0 += luma23 * w0 + luma25 * w1.wzyx;
res1 += luma23 * w1 + luma25 * w0.wzyx;
w0 = texture(ravu_3x_lut4, vec2(0.97, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.99, coord_y));
res0 += luma24 * w0;
res1 += luma24 * w1;
res0 = clamp(res0, 0.0, 1.0);
res1 = clamp(res1, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), res0[0]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), res0[1]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), res0[2]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), res0[3]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), luma24);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), res1[0]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), res1[1]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), res1[2]);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), res1[3]);
}

View file

@ -0,0 +1,344 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-3x.py --target rgb --weights-file weights\ravu-3x_weights-r4.py --float-format float16dx --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 3
//!HEIGHT INPUT_HEIGHT * 3
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_3x_lut4_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_3x_lut4;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_3x_lut4;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-3x (rgb, r4)
//!IN INPUT, ravu_3x_lut4
//!OUT OUTPUT
//!BLOCK_SIZE 96, 24
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
// HLSL doesn't have outerProduct
float4x3 outerProduct(float3 l, float4 r) { return mul(float4x1(r), float1x3(l)); }
shared vec3 inp[532];
shared float inp_luma[532];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_3x_lut4_tex(pos) (vec4(texture(ravu_3x_lut4, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 14 + int(gl_LocalInvocationID.y);
for (int id = int(gl_LocalInvocationIndex); id < 532; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 14, y = (uint)id % 14;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).xyz;
inp_luma[id] = dot(inp[id], color_primary);
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 3;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
float luma1 = inp_luma[local_pos + 1];
float luma2 = inp_luma[local_pos + 2];
float luma3 = inp_luma[local_pos + 3];
float luma4 = inp_luma[local_pos + 4];
float luma5 = inp_luma[local_pos + 5];
float luma7 = inp_luma[local_pos + 14];
float luma8 = inp_luma[local_pos + 15];
float luma9 = inp_luma[local_pos + 16];
float luma10 = inp_luma[local_pos + 17];
float luma11 = inp_luma[local_pos + 18];
float luma12 = inp_luma[local_pos + 19];
float luma13 = inp_luma[local_pos + 20];
float luma14 = inp_luma[local_pos + 28];
float luma15 = inp_luma[local_pos + 29];
float luma16 = inp_luma[local_pos + 30];
float luma17 = inp_luma[local_pos + 31];
float luma18 = inp_luma[local_pos + 32];
float luma19 = inp_luma[local_pos + 33];
float luma20 = inp_luma[local_pos + 34];
float luma21 = inp_luma[local_pos + 42];
float luma22 = inp_luma[local_pos + 43];
float luma23 = inp_luma[local_pos + 44];
float luma24 = inp_luma[local_pos + 45];
float luma25 = inp_luma[local_pos + 46];
float luma26 = inp_luma[local_pos + 47];
float luma27 = inp_luma[local_pos + 48];
float luma28 = inp_luma[local_pos + 56];
float luma29 = inp_luma[local_pos + 57];
float luma30 = inp_luma[local_pos + 58];
float luma31 = inp_luma[local_pos + 59];
float luma32 = inp_luma[local_pos + 60];
float luma33 = inp_luma[local_pos + 61];
float luma34 = inp_luma[local_pos + 62];
float luma35 = inp_luma[local_pos + 70];
float luma36 = inp_luma[local_pos + 71];
float luma37 = inp_luma[local_pos + 72];
float luma38 = inp_luma[local_pos + 73];
float luma39 = inp_luma[local_pos + 74];
float luma40 = inp_luma[local_pos + 75];
float luma41 = inp_luma[local_pos + 76];
float luma43 = inp_luma[local_pos + 85];
float luma44 = inp_luma[local_pos + 86];
float luma45 = inp_luma[local_pos + 87];
float luma46 = inp_luma[local_pos + 88];
float luma47 = inp_luma[local_pos + 89];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma15 - luma1) / 2.0;
gy = (luma9 - luma7) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (luma16 - luma2) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma17 - luma3) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (luma18 - luma4) / 2.0;
gy = (luma12 - luma10) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma19 - luma5) / 2.0;
gy = (luma13 - luma11) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (luma22 - luma8) / 2.0;
gy = (luma16 - luma14) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma23 - luma9) / 2.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (luma24 - luma10) / 2.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (luma25 - luma11) / 2.0;
gy = (luma19 - luma17) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (luma26 - luma12) / 2.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma29 - luma15) / 2.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (luma30 - luma16) / 2.0;
gy = (luma24 - luma22) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (luma31 - luma17) / 2.0;
gy = (luma25 - luma23) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06319146241026467;
gx = (luma32 - luma18) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (luma33 - luma19) / 2.0;
gy = (luma27 - luma25) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (luma36 - luma22) / 2.0;
gy = (luma30 - luma28) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma37 - luma23) / 2.0;
gy = (luma31 - luma29) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (luma38 - luma24) / 2.0;
gy = (luma32 - luma30) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (luma39 - luma25) / 2.0;
gy = (luma33 - luma31) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (luma40 - luma26) / 2.0;
gy = (luma34 - luma32) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma43 - luma29) / 2.0;
gy = (luma37 - luma35) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (luma44 - luma30) / 2.0;
gy = (luma38 - luma36) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma45 - luma31) / 2.0;
gy = (luma39 - luma37) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (luma46 - luma32) / 2.0;
gy = (luma40 - luma38) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (luma47 - luma33) / 2.0;
gy = (luma41 - luma39) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.005), 2.0, lambda >= 0.02);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 3.0 + strength) * 3.0 + coherence + 0.5) / 216.0;
mat4x3 res0 = 0.0, res1 = 0.0;
vec4 w0, w1;
w0 = texture(ravu_3x_lut4, vec2(0.01, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.03, coord_y));
res0 += outerProduct(inp[local_pos + 0], w0) + outerProduct(inp[local_pos + 90], w1.wzyx);
res1 += outerProduct(inp[local_pos + 0], w1) + outerProduct(inp[local_pos + 90], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.05, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.07, coord_y));
res0 += outerProduct(inp[local_pos + 1], w0) + outerProduct(inp[local_pos + 89], w1.wzyx);
res1 += outerProduct(inp[local_pos + 1], w1) + outerProduct(inp[local_pos + 89], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.09, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.11, coord_y));
res0 += outerProduct(inp[local_pos + 2], w0) + outerProduct(inp[local_pos + 88], w1.wzyx);
res1 += outerProduct(inp[local_pos + 2], w1) + outerProduct(inp[local_pos + 88], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.13, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.15, coord_y));
res0 += outerProduct(inp[local_pos + 3], w0) + outerProduct(inp[local_pos + 87], w1.wzyx);
res1 += outerProduct(inp[local_pos + 3], w1) + outerProduct(inp[local_pos + 87], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.17, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.19, coord_y));
res0 += outerProduct(inp[local_pos + 4], w0) + outerProduct(inp[local_pos + 86], w1.wzyx);
res1 += outerProduct(inp[local_pos + 4], w1) + outerProduct(inp[local_pos + 86], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.21, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.23, coord_y));
res0 += outerProduct(inp[local_pos + 5], w0) + outerProduct(inp[local_pos + 85], w1.wzyx);
res1 += outerProduct(inp[local_pos + 5], w1) + outerProduct(inp[local_pos + 85], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.25, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.27, coord_y));
res0 += outerProduct(inp[local_pos + 6], w0) + outerProduct(inp[local_pos + 84], w1.wzyx);
res1 += outerProduct(inp[local_pos + 6], w1) + outerProduct(inp[local_pos + 84], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.29, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.31, coord_y));
res0 += outerProduct(inp[local_pos + 14], w0) + outerProduct(inp[local_pos + 76], w1.wzyx);
res1 += outerProduct(inp[local_pos + 14], w1) + outerProduct(inp[local_pos + 76], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.33, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.35, coord_y));
res0 += outerProduct(inp[local_pos + 15], w0) + outerProduct(inp[local_pos + 75], w1.wzyx);
res1 += outerProduct(inp[local_pos + 15], w1) + outerProduct(inp[local_pos + 75], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.37, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.39, coord_y));
res0 += outerProduct(inp[local_pos + 16], w0) + outerProduct(inp[local_pos + 74], w1.wzyx);
res1 += outerProduct(inp[local_pos + 16], w1) + outerProduct(inp[local_pos + 74], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.41, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.43, coord_y));
res0 += outerProduct(inp[local_pos + 17], w0) + outerProduct(inp[local_pos + 73], w1.wzyx);
res1 += outerProduct(inp[local_pos + 17], w1) + outerProduct(inp[local_pos + 73], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.45, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.47, coord_y));
res0 += outerProduct(inp[local_pos + 18], w0) + outerProduct(inp[local_pos + 72], w1.wzyx);
res1 += outerProduct(inp[local_pos + 18], w1) + outerProduct(inp[local_pos + 72], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.49, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.51, coord_y));
res0 += outerProduct(inp[local_pos + 19], w0) + outerProduct(inp[local_pos + 71], w1.wzyx);
res1 += outerProduct(inp[local_pos + 19], w1) + outerProduct(inp[local_pos + 71], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.53, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.55, coord_y));
res0 += outerProduct(inp[local_pos + 20], w0) + outerProduct(inp[local_pos + 70], w1.wzyx);
res1 += outerProduct(inp[local_pos + 20], w1) + outerProduct(inp[local_pos + 70], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.57, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.59, coord_y));
res0 += outerProduct(inp[local_pos + 28], w0) + outerProduct(inp[local_pos + 62], w1.wzyx);
res1 += outerProduct(inp[local_pos + 28], w1) + outerProduct(inp[local_pos + 62], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.61, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.63, coord_y));
res0 += outerProduct(inp[local_pos + 29], w0) + outerProduct(inp[local_pos + 61], w1.wzyx);
res1 += outerProduct(inp[local_pos + 29], w1) + outerProduct(inp[local_pos + 61], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.65, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.67, coord_y));
res0 += outerProduct(inp[local_pos + 30], w0) + outerProduct(inp[local_pos + 60], w1.wzyx);
res1 += outerProduct(inp[local_pos + 30], w1) + outerProduct(inp[local_pos + 60], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.69, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.71, coord_y));
res0 += outerProduct(inp[local_pos + 31], w0) + outerProduct(inp[local_pos + 59], w1.wzyx);
res1 += outerProduct(inp[local_pos + 31], w1) + outerProduct(inp[local_pos + 59], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.73, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.75, coord_y));
res0 += outerProduct(inp[local_pos + 32], w0) + outerProduct(inp[local_pos + 58], w1.wzyx);
res1 += outerProduct(inp[local_pos + 32], w1) + outerProduct(inp[local_pos + 58], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.77, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.79, coord_y));
res0 += outerProduct(inp[local_pos + 33], w0) + outerProduct(inp[local_pos + 57], w1.wzyx);
res1 += outerProduct(inp[local_pos + 33], w1) + outerProduct(inp[local_pos + 57], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.81, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.83, coord_y));
res0 += outerProduct(inp[local_pos + 34], w0) + outerProduct(inp[local_pos + 56], w1.wzyx);
res1 += outerProduct(inp[local_pos + 34], w1) + outerProduct(inp[local_pos + 56], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.85, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.87, coord_y));
res0 += outerProduct(inp[local_pos + 42], w0) + outerProduct(inp[local_pos + 48], w1.wzyx);
res1 += outerProduct(inp[local_pos + 42], w1) + outerProduct(inp[local_pos + 48], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.89, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.91, coord_y));
res0 += outerProduct(inp[local_pos + 43], w0) + outerProduct(inp[local_pos + 47], w1.wzyx);
res1 += outerProduct(inp[local_pos + 43], w1) + outerProduct(inp[local_pos + 47], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.93, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.95, coord_y));
res0 += outerProduct(inp[local_pos + 44], w0) + outerProduct(inp[local_pos + 46], w1.wzyx);
res1 += outerProduct(inp[local_pos + 44], w1) + outerProduct(inp[local_pos + 46], w0.wzyx);
w0 = texture(ravu_3x_lut4, vec2(0.97, coord_y));
w1 = texture(ravu_3x_lut4, vec2(0.99, coord_y));
res0 += outerProduct(inp[local_pos + 45], w0);
res1 += outerProduct(inp[local_pos + 45], w1);
res0[0] = clamp(res0[0], 0.0, 1.0);
res0[1] = clamp(res0[1], 0.0, 1.0);
res0[2] = clamp(res0[2], 0.0, 1.0);
res0[3] = clamp(res0[3], 0.0, 1.0);
res1[0] = clamp(res1[0], 0.0, 1.0);
res1[1] = clamp(res1[1], 0.0, 1.0);
res1[2] = clamp(res1[2], 0.0, 1.0);
res1[3] = clamp(res1[3], 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 0), vec4(res0[0], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 1), vec4(res0[1], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(0, 2), vec4(res0[2], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 0), vec4(res0[3], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 1), vec4(inp[local_pos + 45], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(1, 2), vec4(res1[0], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 0), vec4(res1[1], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 1), vec4(res1[2], 1.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 3 + ivec2(2, 2), vec4(res1[3], 1.0));
}

View file

@ -0,0 +1,224 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r2.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lite_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lite_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lite_lut2;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Lite-AR (r2, compute)
//!IN INPUT, ravu_lite_lut2
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp[340];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lite_lut2_tex(pos) (vec4(texture(ravu_lite_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y);
#pragma warning(disable : 3557)
for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 10, y = (uint)id % 10;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (inp[local_pos + 10] - inp[local_pos + 0]);
gy = (inp[local_pos + 1] - inp[local_pos + 0]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 11] - inp[local_pos + 1]);
gy = (inp[local_pos + 2] - inp[local_pos + 0]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 12] - inp[local_pos + 2]);
gy = (inp[local_pos + 2] - inp[local_pos + 1]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 20] - inp[local_pos + 0]) / 2.0;
gy = (inp[local_pos + 11] - inp[local_pos + 10]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 21] - inp[local_pos + 1]) / 2.0;
gy = (inp[local_pos + 12] - inp[local_pos + 10]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (inp[local_pos + 22] - inp[local_pos + 2]) / 2.0;
gy = (inp[local_pos + 12] - inp[local_pos + 11]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 20] - inp[local_pos + 10]);
gy = (inp[local_pos + 21] - inp[local_pos + 20]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 21] - inp[local_pos + 11]);
gy = (inp[local_pos + 22] - inp[local_pos + 20]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 22] - inp[local_pos + 12]);
gy = (inp[local_pos + 22] - inp[local_pos + 21]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0;
vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w;
vec4 lo = vec4(0.0, 0.0, 0.0, 0.0), hi = vec4(0.0, 0.0, 0.0, 0.0), lo2 = vec4(0.0, 0.0, 0.0, 0.0),
hi2 = vec4(0.0, 0.0, 0.0, 0.0), wg, cg4, cg4_1;
w = texture(ravu_lite_lut2, vec2(0.1, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 0] * w + inp[local_pos + 22] * w.wzyx;
cg4 =
vec4(0.1 + inp[local_pos + 0], 1.1 - inp[local_pos + 0], 0.1 + inp[local_pos + 22], 1.1 - inp[local_pos + 22]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut2, vec2(0.3, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 1] * w + inp[local_pos + 21] * w.wzyx;
cg4 =
vec4(0.1 + inp[local_pos + 1], 1.1 - inp[local_pos + 1], 0.1 + inp[local_pos + 21], 1.1 - inp[local_pos + 21]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut2, vec2(0.5, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 2] * w + inp[local_pos + 20] * w.wzyx;
cg4 =
vec4(0.1 + inp[local_pos + 2], 1.1 - inp[local_pos + 2], 0.1 + inp[local_pos + 20], 1.1 - inp[local_pos + 20]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut2, vec2(0.7, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 10] * w + inp[local_pos + 12] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 10], 1.1 - inp[local_pos + 10], 0.1 + inp[local_pos + 12],
1.1 - inp[local_pos + 12]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut2, vec2(0.9, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 11] * w;
vec2 cg2 = vec2(0.1 + inp[local_pos + 11], 1.1 - inp[local_pos + 11]);
vec2 cg2_1 = cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
hi += cg2.x * wg;
lo += cg2.y * wg;
cg2 *= cg2_1;
hi2 += cg2.x * wg;
lo2 += cg2.y * wg;
lo = 1.1 - lo2 / lo;
hi = hi2 / hi - 0.1;
res = mix(res, clamp(res, lo, hi), 0.800000);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0));
}

View file

@ -0,0 +1,268 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r3.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lite_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lite_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lite_lut3;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Lite-AR (r3, compute)
//!IN INPUT, ravu_lite_lut3
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp[432];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lite_lut3_tex(pos) (vec4(texture(ravu_lite_lut3, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 12 + int(gl_LocalInvocationID.y);
#pragma warning(disable : 3557)
for (int id = int(gl_LocalInvocationIndex); id < 432; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 12, y = (uint)id % 12;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (inp[local_pos + 25] - inp[local_pos + 1]) / 2.0;
gy = (inp[local_pos + 14] - inp[local_pos + 12]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 26] - inp[local_pos + 2]) / 2.0;
gy = (inp[local_pos + 15] - inp[local_pos + 13]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 27] - inp[local_pos + 3]) / 2.0;
gy = (inp[local_pos + 16] - inp[local_pos + 14]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 37] - inp[local_pos + 13]) / 2.0;
gy = (inp[local_pos + 26] - inp[local_pos + 24]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 38] - inp[local_pos + 14]) / 2.0;
gy = (inp[local_pos + 27] - inp[local_pos + 25]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (inp[local_pos + 39] - inp[local_pos + 15]) / 2.0;
gy = (inp[local_pos + 28] - inp[local_pos + 26]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 49] - inp[local_pos + 25]) / 2.0;
gy = (inp[local_pos + 38] - inp[local_pos + 36]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 50] - inp[local_pos + 26]) / 2.0;
gy = (inp[local_pos + 39] - inp[local_pos + 37]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 51] - inp[local_pos + 27]) / 2.0;
gy = (inp[local_pos + 40] - inp[local_pos + 38]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0;
vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w;
vec4 lo = vec4(0.0, 0.0, 0.0, 0.0), hi = vec4(0.0, 0.0, 0.0, 0.0), lo2 = vec4(0.0, 0.0, 0.0, 0.0),
hi2 = vec4(0.0, 0.0, 0.0, 0.0), wg, cg4, cg4_1;
w = texture(ravu_lite_lut3, vec2(0.038461538461538464, coord_y));
res += inp[local_pos + 0] * w + inp[local_pos + 52] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.11538461538461539, coord_y));
res += inp[local_pos + 1] * w + inp[local_pos + 51] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.19230769230769232, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 2] * w + inp[local_pos + 50] * w.wzyx;
cg4 =
vec4(0.1 + inp[local_pos + 2], 1.1 - inp[local_pos + 2], 0.1 + inp[local_pos + 50], 1.1 - inp[local_pos + 50]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut3, vec2(0.2692307692307692, coord_y));
res += inp[local_pos + 3] * w + inp[local_pos + 49] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.34615384615384615, coord_y));
res += inp[local_pos + 4] * w + inp[local_pos + 48] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.4230769230769231, coord_y));
res += inp[local_pos + 12] * w + inp[local_pos + 40] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.5, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 13] * w + inp[local_pos + 39] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 13], 1.1 - inp[local_pos + 13], 0.1 + inp[local_pos + 39],
1.1 - inp[local_pos + 39]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut3, vec2(0.5769230769230769, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 14] * w + inp[local_pos + 38] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 14], 1.1 - inp[local_pos + 14], 0.1 + inp[local_pos + 38],
1.1 - inp[local_pos + 38]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut3, vec2(0.6538461538461539, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 15] * w + inp[local_pos + 37] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 15], 1.1 - inp[local_pos + 15], 0.1 + inp[local_pos + 37],
1.1 - inp[local_pos + 37]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut3, vec2(0.7307692307692307, coord_y));
res += inp[local_pos + 16] * w + inp[local_pos + 36] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.8076923076923077, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 24] * w + inp[local_pos + 28] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 24], 1.1 - inp[local_pos + 24], 0.1 + inp[local_pos + 28],
1.1 - inp[local_pos + 28]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut3, vec2(0.8846153846153846, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 25] * w + inp[local_pos + 27] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 25], 1.1 - inp[local_pos + 25], 0.1 + inp[local_pos + 27],
1.1 - inp[local_pos + 27]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut3, vec2(0.9615384615384616, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 26] * w;
vec2 cg2 = vec2(0.1 + inp[local_pos + 26], 1.1 - inp[local_pos + 26]);
vec2 cg2_1 = cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
hi += cg2.x * wg;
lo += cg2.y * wg;
cg2 *= cg2_1;
hi2 += cg2.x * wg;
lo2 += cg2.y * wg;
lo = 1.1 - lo2 / lo;
hi = hi2 / hi - 0.1;
res = mix(res, clamp(res, lo, hi), 0.800000);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0));
}

View file

@ -0,0 +1,340 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r4.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lite_lut4_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lite_lut4;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lite_lut4;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Lite-AR (r4, compute)
//!IN INPUT, ravu_lite_lut4
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp[532];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lite_lut4_tex(pos) (vec4(texture(ravu_lite_lut4, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 14 + int(gl_LocalInvocationID.y);
#pragma warning(disable : 3557)
for (int id = int(gl_LocalInvocationIndex); id < 532; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 14, y = (uint)id % 14;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (inp[local_pos + 29] - inp[local_pos + 1]) / 2.0;
gy = (inp[local_pos + 16] - inp[local_pos + 14]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (inp[local_pos + 30] - inp[local_pos + 2]) / 2.0;
gy = (inp[local_pos + 17] - inp[local_pos + 15]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 31] - inp[local_pos + 3]) / 2.0;
gy = (inp[local_pos + 18] - inp[local_pos + 16]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (inp[local_pos + 32] - inp[local_pos + 4]) / 2.0;
gy = (inp[local_pos + 19] - inp[local_pos + 17]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 33] - inp[local_pos + 5]) / 2.0;
gy = (inp[local_pos + 20] - inp[local_pos + 18]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (inp[local_pos + 43] - inp[local_pos + 15]) / 2.0;
gy = (inp[local_pos + 30] - inp[local_pos + 28]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 44] - inp[local_pos + 16]) / 2.0;
gy = (inp[local_pos + 31] - inp[local_pos + 29]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (inp[local_pos + 45] - inp[local_pos + 17]) / 2.0;
gy = (inp[local_pos + 32] - inp[local_pos + 30]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (inp[local_pos + 46] - inp[local_pos + 18]) / 2.0;
gy = (inp[local_pos + 33] - inp[local_pos + 31]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (inp[local_pos + 47] - inp[local_pos + 19]) / 2.0;
gy = (inp[local_pos + 34] - inp[local_pos + 32]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 57] - inp[local_pos + 29]) / 2.0;
gy = (inp[local_pos + 44] - inp[local_pos + 42]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (inp[local_pos + 58] - inp[local_pos + 30]) / 2.0;
gy = (inp[local_pos + 45] - inp[local_pos + 43]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (inp[local_pos + 59] - inp[local_pos + 31]) / 2.0;
gy = (inp[local_pos + 46] - inp[local_pos + 44]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06319146241026467;
gx = (inp[local_pos + 60] - inp[local_pos + 32]) / 2.0;
gy = (inp[local_pos + 47] - inp[local_pos + 45]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (inp[local_pos + 61] - inp[local_pos + 33]) / 2.0;
gy = (inp[local_pos + 48] - inp[local_pos + 46]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (inp[local_pos + 71] - inp[local_pos + 43]) / 2.0;
gy = (inp[local_pos + 58] - inp[local_pos + 56]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 72] - inp[local_pos + 44]) / 2.0;
gy = (inp[local_pos + 59] - inp[local_pos + 57]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (inp[local_pos + 73] - inp[local_pos + 45]) / 2.0;
gy = (inp[local_pos + 60] - inp[local_pos + 58]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (inp[local_pos + 74] - inp[local_pos + 46]) / 2.0;
gy = (inp[local_pos + 61] - inp[local_pos + 59]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (inp[local_pos + 75] - inp[local_pos + 47]) / 2.0;
gy = (inp[local_pos + 62] - inp[local_pos + 60]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 85] - inp[local_pos + 57]) / 2.0;
gy = (inp[local_pos + 72] - inp[local_pos + 70]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (inp[local_pos + 86] - inp[local_pos + 58]) / 2.0;
gy = (inp[local_pos + 73] - inp[local_pos + 71]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 87] - inp[local_pos + 59]) / 2.0;
gy = (inp[local_pos + 74] - inp[local_pos + 72]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (inp[local_pos + 88] - inp[local_pos + 60]) / 2.0;
gy = (inp[local_pos + 75] - inp[local_pos + 73]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 89] - inp[local_pos + 61]) / 2.0;
gy = (inp[local_pos + 76] - inp[local_pos + 74]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0;
vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w;
vec4 lo = vec4(0.0, 0.0, 0.0, 0.0), hi = vec4(0.0, 0.0, 0.0, 0.0), lo2 = vec4(0.0, 0.0, 0.0, 0.0),
hi2 = vec4(0.0, 0.0, 0.0, 0.0), wg, cg4, cg4_1;
w = texture(ravu_lite_lut4, vec2(0.02, coord_y));
res += inp[local_pos + 0] * w + inp[local_pos + 90] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.06, coord_y));
res += inp[local_pos + 1] * w + inp[local_pos + 89] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.1, coord_y));
res += inp[local_pos + 2] * w + inp[local_pos + 88] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.14, coord_y));
res += inp[local_pos + 3] * w + inp[local_pos + 87] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.18, coord_y));
res += inp[local_pos + 4] * w + inp[local_pos + 86] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.22, coord_y));
res += inp[local_pos + 5] * w + inp[local_pos + 85] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.26, coord_y));
res += inp[local_pos + 6] * w + inp[local_pos + 84] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.3, coord_y));
res += inp[local_pos + 14] * w + inp[local_pos + 76] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.34, coord_y));
res += inp[local_pos + 15] * w + inp[local_pos + 75] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.38, coord_y));
res += inp[local_pos + 16] * w + inp[local_pos + 74] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.42, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 17] * w + inp[local_pos + 73] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 17], 1.1 - inp[local_pos + 17], 0.1 + inp[local_pos + 73],
1.1 - inp[local_pos + 73]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut4, vec2(0.46, coord_y));
res += inp[local_pos + 18] * w + inp[local_pos + 72] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.5, coord_y));
res += inp[local_pos + 19] * w + inp[local_pos + 71] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.54, coord_y));
res += inp[local_pos + 20] * w + inp[local_pos + 70] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.58, coord_y));
res += inp[local_pos + 28] * w + inp[local_pos + 62] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.62, coord_y));
res += inp[local_pos + 29] * w + inp[local_pos + 61] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.66, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 30] * w + inp[local_pos + 60] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 30], 1.1 - inp[local_pos + 30], 0.1 + inp[local_pos + 60],
1.1 - inp[local_pos + 60]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut4, vec2(0.7, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 31] * w + inp[local_pos + 59] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 31], 1.1 - inp[local_pos + 31], 0.1 + inp[local_pos + 59],
1.1 - inp[local_pos + 59]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut4, vec2(0.74, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 32] * w + inp[local_pos + 58] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 32], 1.1 - inp[local_pos + 32], 0.1 + inp[local_pos + 58],
1.1 - inp[local_pos + 58]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut4, vec2(0.78, coord_y));
res += inp[local_pos + 33] * w + inp[local_pos + 57] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.82, coord_y));
res += inp[local_pos + 34] * w + inp[local_pos + 56] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.86, coord_y));
res += inp[local_pos + 42] * w + inp[local_pos + 48] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.9, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 43] * w + inp[local_pos + 47] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 43], 1.1 - inp[local_pos + 43], 0.1 + inp[local_pos + 47],
1.1 - inp[local_pos + 47]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut4, vec2(0.94, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 44] * w + inp[local_pos + 46] * w.wzyx;
cg4 = vec4(0.1 + inp[local_pos + 44], 1.1 - inp[local_pos + 44], 0.1 + inp[local_pos + 46],
1.1 - inp[local_pos + 46]);
cg4_1 = cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
cg4 *= cg4;
hi += cg4.x * wg + cg4.z * wg.wzyx;
lo += cg4.y * wg + cg4.w * wg.wzyx;
cg4 *= cg4_1;
hi2 += cg4.x * wg + cg4.z * wg.wzyx;
lo2 += cg4.y * wg + cg4.w * wg.wzyx;
w = texture(ravu_lite_lut4, vec2(0.98, coord_y));
wg = max(vec4(0.0, 0.0, 0.0, 0.0), w);
res += inp[local_pos + 45] * w;
vec2 cg2 = vec2(0.1 + inp[local_pos + 45], 1.1 - inp[local_pos + 45]);
vec2 cg2_1 = cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
cg2 *= cg2;
hi += cg2.x * wg;
lo += cg2.y * wg;
cg2 *= cg2_1;
hi2 += cg2.x * wg;
lo2 += cg2.y * wg;
lo = 1.1 - lo2 / lo;
hi = hi2 / hi - 0.1;
res = mix(res, clamp(res, lo, hi), 0.800000);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0));
}

View file

@ -0,0 +1,151 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lite_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lite_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lite_lut2;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Lite (r2, compute)
//!IN INPUT, ravu_lite_lut2
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp[340];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lite_lut2_tex(pos) (vec4(texture(ravu_lite_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 10 + int(gl_LocalInvocationID.y);
#pragma warning(disable : 3557)
for (int id = int(gl_LocalInvocationIndex); id < 340; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 10, y = (uint)id % 10;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (inp[local_pos + 10] - inp[local_pos + 0]);
gy = (inp[local_pos + 1] - inp[local_pos + 0]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 11] - inp[local_pos + 1]);
gy = (inp[local_pos + 2] - inp[local_pos + 0]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 12] - inp[local_pos + 2]);
gy = (inp[local_pos + 2] - inp[local_pos + 1]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 20] - inp[local_pos + 0]) / 2.0;
gy = (inp[local_pos + 11] - inp[local_pos + 10]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 21] - inp[local_pos + 1]) / 2.0;
gy = (inp[local_pos + 12] - inp[local_pos + 10]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (inp[local_pos + 22] - inp[local_pos + 2]) / 2.0;
gy = (inp[local_pos + 12] - inp[local_pos + 11]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 20] - inp[local_pos + 10]);
gy = (inp[local_pos + 21] - inp[local_pos + 20]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 21] - inp[local_pos + 11]);
gy = (inp[local_pos + 22] - inp[local_pos + 20]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 22] - inp[local_pos + 12]);
gy = (inp[local_pos + 22] - inp[local_pos + 21]);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0;
vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w;
w = texture(ravu_lite_lut2, vec2(0.1, coord_y));
res += inp[local_pos + 0] * w + inp[local_pos + 22] * w.wzyx;
w = texture(ravu_lite_lut2, vec2(0.3, coord_y));
res += inp[local_pos + 1] * w + inp[local_pos + 21] * w.wzyx;
w = texture(ravu_lite_lut2, vec2(0.5, coord_y));
res += inp[local_pos + 2] * w + inp[local_pos + 20] * w.wzyx;
w = texture(ravu_lite_lut2, vec2(0.7, coord_y));
res += inp[local_pos + 10] * w + inp[local_pos + 12] * w.wzyx;
w = texture(ravu_lite_lut2, vec2(0.9, coord_y));
res += inp[local_pos + 11] * w;
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0));
}

View file

@ -1,174 +1,167 @@
// ravu-lite-r3
// 移植自 https://github.com/bjin/mpv-prescalers/blob/cc02ed95c1fe05b72bc21d41257c4c085e6e409b/ravu-lite-r3.hook
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 3
//!OUTPUT_WIDTH INPUT_WIDTH * 2
//!OUTPUT_HEIGHT INPUT_HEIGHT * 2
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!SOURCE RAVU_Lite_R3_Weights.dds
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lite_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lite_lut3;
//!SAMPLER
//!FILTER POINT
SamplerState sam;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam1;
SamplerState sam_ravu_lite_lut3;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Lite (r3, compute)
//!IN INPUT, ravu_lite_lut3
//!BLOCK_SIZE 32, 16
//!NUM_THREADS 16, 8
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp[432];
#pragma warning(disable: 3557) // X3557: loop only executes for 1 iteration(s), forcing loop to unroll
#define CURRENT_PASS 1
#define NUM_PIXELS_X (MP_BLOCK_WIDTH + 4)
#define NUM_PIXELS_Y (MP_BLOCK_HEIGHT + 4)
groupshared float inp[NUM_PIXELS_Y][NUM_PIXELS_X];
#define PI 3.1415926535897932384626433832795
float GetLuma(float3 color) {
return dot(float3(0.299f, 0.587f, 0.114f), color);
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
const static float2x3 rgb2uv = {
-0.169, -0.331, 0.5,
0.5, -0.419, -0.081
};
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
const static float3x3 yuv2rgb = {
1, -0.00093, 1.401687,
1, -0.3437, -0.71417,
1, 1.77216, 0.00099
};
#define ravu_lite_lut3_tex(pos) (vec4(texture(ravu_lite_lut3, pos)))
float mod(float x, float y) {
return x - y * floor(x / y);
}
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
const float2 inputPt = GetInputPt();
for (int id = threadId.y * MP_NUM_THREADS_X + threadId.x; id < NUM_PIXELS_X * NUM_PIXELS_Y; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) {
uint2 pos = { (uint)id % NUM_PIXELS_X, (uint)id / NUM_PIXELS_X };
inp[pos.y][pos.x] = GetLuma(INPUT.SampleLevel(sam, inputPt * ((blockStart / 2) + pos - 1.5f), 0).rgb);
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 12 + int(gl_LocalInvocationID.y);
#pragma warning(disable : 3557)
for (int id = int(gl_LocalInvocationIndex); id < 432; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 12, y = (uint)id % 12;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x;
}
GroupMemoryBarrierWithGroupSync();
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
if (!CheckViewport(destPos)) {
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
float src[5][5];
[unroll]
for (uint i = 0; i < 5; ++i) {
[unroll]
for (uint j = 0; j < 5; ++j) {
src[j][i] = inp[threadId.y + i][threadId.x + j];
}
}
float3 abd = 0;
#endif
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (src[2][1] - src[0][1]) / 2.0;
gy = (src[1][2] - src[1][0]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (src[2][2] - src[0][2]) / 2.0;
gy = (src[1][3] - src[1][1]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (src[2][3] - src[0][3]) / 2.0;
gy = (src[1][4] - src[1][2]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (src[3][1] - src[1][1]) / 2.0;
gy = (src[2][2] - src[2][0]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (src[3][2] - src[1][2]) / 2.0;
gy = (src[2][3] - src[2][1]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (src[3][3] - src[1][3]) / 2.0;
gy = (src[2][4] - src[2][2]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (src[4][1] - src[2][1]) / 2.0;
gy = (src[3][2] - src[3][0]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (src[4][2] - src[2][2]) / 2.0;
gy = (src[3][3] - src[3][1]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (src[4][3] - src[2][3]) / 2.0;
gy = (src[3][4] - src[3][2]) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 25] - inp[local_pos + 1]) / 2.0;
gy = (inp[local_pos + 14] - inp[local_pos + 12]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 26] - inp[local_pos + 2]) / 2.0;
gy = (inp[local_pos + 15] - inp[local_pos + 13]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 27] - inp[local_pos + 3]) / 2.0;
gy = (inp[local_pos + 16] - inp[local_pos + 14]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 37] - inp[local_pos + 13]) / 2.0;
gy = (inp[local_pos + 26] - inp[local_pos + 24]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 38] - inp[local_pos + 14]) / 2.0;
gy = (inp[local_pos + 27] - inp[local_pos + 25]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.13080118386382833;
gx = (inp[local_pos + 39] - inp[local_pos + 15]) / 2.0;
gy = (inp[local_pos + 28] - inp[local_pos + 26]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 49] - inp[local_pos + 25]) / 2.0;
gy = (inp[local_pos + 38] - inp[local_pos + 36]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
gx = (inp[local_pos + 50] - inp[local_pos + 26]) / 2.0;
gy = (inp[local_pos + 39] - inp[local_pos + 37]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.11543163961422666;
gx = (inp[local_pos + 51] - inp[local_pos + 27]) / 2.0;
gy = (inp[local_pos + 40] - inp[local_pos + 38]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.1018680644198163;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = lerp(mod(atan2(b, L1 - a) + PI, PI), 0.0, abs(b) < 1.192092896e-7);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = lerp((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / PI);
float strength = lerp(lerp(0.0, 1.0, lambda >= 0.004), lerp(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = lerp(lerp(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0f + strength) * 3.0f + coherence + 0.5f) / 288.0f;
float4 res = 0, w;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.038461538461538464, coord_y), 0);
res += src[0][0] * w + src[4][4] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.11538461538461539, coord_y), 0);
res += src[0][1] * w + src[4][3] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.19230769230769232, coord_y), 0);
res += src[0][2] * w + src[4][2] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.2692307692307692, coord_y), 0);
res += src[0][3] * w + src[4][1] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.34615384615384615, coord_y), 0);
res += src[0][4] * w + src[4][0] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.4230769230769231, coord_y), 0);
res += src[1][0] * w + src[3][4] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.5, coord_y), 0);
res += src[1][1] * w + src[3][3] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.5769230769230769, coord_y), 0);
res += src[1][2] * w + src[3][2] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.6538461538461539, coord_y), 0);
res += src[1][3] * w + src[3][1] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.7307692307692307, coord_y), 0);
res += src[1][4] * w + src[3][0] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.8076923076923077, coord_y), 0);
res += src[2][0] * w + src[2][4] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.8846153846153846, coord_y), 0);
res += src[2][1] * w + src[2][3] * w.wzyx;
w = ravu_lite_lut3.SampleLevel(sam, float2(0.9615384615384616, coord_y), 0);
res += src[2][2] * w;
res = saturate(res);
const float2 outputPt = GetOutputPt();
float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb);
WriteToOutput(destPos, mul(yuv2rgb, float3(res.x, originUV)));
++destPos.y;
if (CheckViewport(destPos)) {
originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb);
WriteToOutput(destPos, mul(yuv2rgb, float3(res.y, originUV)));
}
++destPos.x;
if (CheckViewport(destPos)) {
originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb);
WriteToOutput(destPos, mul(yuv2rgb, float3(res.w, originUV)));
}
--destPos.y;
if (CheckViewport(destPos)) {
originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * outputPt, 0).rgb);
WriteToOutput(destPos, mul(yuv2rgb, float3(res.z, originUV)));
}
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0;
vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w;
w = texture(ravu_lite_lut3, vec2(0.038461538461538464, coord_y));
res += inp[local_pos + 0] * w + inp[local_pos + 52] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.11538461538461539, coord_y));
res += inp[local_pos + 1] * w + inp[local_pos + 51] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.19230769230769232, coord_y));
res += inp[local_pos + 2] * w + inp[local_pos + 50] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.2692307692307692, coord_y));
res += inp[local_pos + 3] * w + inp[local_pos + 49] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.34615384615384615, coord_y));
res += inp[local_pos + 4] * w + inp[local_pos + 48] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.4230769230769231, coord_y));
res += inp[local_pos + 12] * w + inp[local_pos + 40] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.5, coord_y));
res += inp[local_pos + 13] * w + inp[local_pos + 39] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.5769230769230769, coord_y));
res += inp[local_pos + 14] * w + inp[local_pos + 38] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.6538461538461539, coord_y));
res += inp[local_pos + 15] * w + inp[local_pos + 37] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.7307692307692307, coord_y));
res += inp[local_pos + 16] * w + inp[local_pos + 36] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.8076923076923077, coord_y));
res += inp[local_pos + 24] * w + inp[local_pos + 28] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.8846153846153846, coord_y));
res += inp[local_pos + 25] * w + inp[local_pos + 27] * w.wzyx;
w = texture(ravu_lite_lut3, vec2(0.9615384615384616, coord_y));
res += inp[local_pos + 26] * w;
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0));
}

View file

@ -0,0 +1,239 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-lite.py --weights-file weights\ravu-lite_weights-r4.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lite_lut4_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lite_lut4;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lite_lut4;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Lite (r4, compute)
//!IN INPUT, ravu_lite_lut4
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp[532];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lite_lut4_tex(pos) (vec4(texture(ravu_lite_lut4, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 14 + int(gl_LocalInvocationID.y);
#pragma warning(disable : 3557)
for (int id = int(gl_LocalInvocationIndex); id < 532; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 14, y = (uint)id % 14;
inp[id] = HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (inp[local_pos + 29] - inp[local_pos + 1]) / 2.0;
gy = (inp[local_pos + 16] - inp[local_pos + 14]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (inp[local_pos + 30] - inp[local_pos + 2]) / 2.0;
gy = (inp[local_pos + 17] - inp[local_pos + 15]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 31] - inp[local_pos + 3]) / 2.0;
gy = (inp[local_pos + 18] - inp[local_pos + 16]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (inp[local_pos + 32] - inp[local_pos + 4]) / 2.0;
gy = (inp[local_pos + 19] - inp[local_pos + 17]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 33] - inp[local_pos + 5]) / 2.0;
gy = (inp[local_pos + 20] - inp[local_pos + 18]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (inp[local_pos + 43] - inp[local_pos + 15]) / 2.0;
gy = (inp[local_pos + 30] - inp[local_pos + 28]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 44] - inp[local_pos + 16]) / 2.0;
gy = (inp[local_pos + 31] - inp[local_pos + 29]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (inp[local_pos + 45] - inp[local_pos + 17]) / 2.0;
gy = (inp[local_pos + 32] - inp[local_pos + 30]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (inp[local_pos + 46] - inp[local_pos + 18]) / 2.0;
gy = (inp[local_pos + 33] - inp[local_pos + 31]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (inp[local_pos + 47] - inp[local_pos + 19]) / 2.0;
gy = (inp[local_pos + 34] - inp[local_pos + 32]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 57] - inp[local_pos + 29]) / 2.0;
gy = (inp[local_pos + 44] - inp[local_pos + 42]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (inp[local_pos + 58] - inp[local_pos + 30]) / 2.0;
gy = (inp[local_pos + 45] - inp[local_pos + 43]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (inp[local_pos + 59] - inp[local_pos + 31]) / 2.0;
gy = (inp[local_pos + 46] - inp[local_pos + 44]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06319146241026467;
gx = (inp[local_pos + 60] - inp[local_pos + 32]) / 2.0;
gy = (inp[local_pos + 47] - inp[local_pos + 45]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (inp[local_pos + 61] - inp[local_pos + 33]) / 2.0;
gy = (inp[local_pos + 48] - inp[local_pos + 46]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (inp[local_pos + 71] - inp[local_pos + 43]) / 2.0;
gy = (inp[local_pos + 58] - inp[local_pos + 56]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 72] - inp[local_pos + 44]) / 2.0;
gy = (inp[local_pos + 59] - inp[local_pos + 57]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (inp[local_pos + 73] - inp[local_pos + 45]) / 2.0;
gy = (inp[local_pos + 60] - inp[local_pos + 58]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.055766269846849466;
gx = (inp[local_pos + 74] - inp[local_pos + 46]) / 2.0;
gy = (inp[local_pos + 61] - inp[local_pos + 59]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04921356040854137;
gx = (inp[local_pos + 75] - inp[local_pos + 47]) / 2.0;
gy = (inp[local_pos + 62] - inp[local_pos + 60]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 85] - inp[local_pos + 57]) / 2.0;
gy = (inp[local_pos + 72] - inp[local_pos + 70]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
gx = (inp[local_pos + 86] - inp[local_pos + 58]) / 2.0;
gy = (inp[local_pos + 73] - inp[local_pos + 71]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 87] - inp[local_pos + 59]) / 2.0;
gy = (inp[local_pos + 74] - inp[local_pos + 72]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.038327559383903906;
gx = (inp[local_pos + 88] - inp[local_pos + 60]) / 2.0;
gy = (inp[local_pos + 75] - inp[local_pos + 73]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.033823952439922346;
gx = (inp[local_pos + 89] - inp[local_pos + 61]) / 2.0;
gy = (inp[local_pos + 76] - inp[local_pos + 74]) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02324683987829437;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence + 0.5) / 288.0;
vec4 res = vec4(0.0, 0.0, 0.0, 0.0), w;
w = texture(ravu_lite_lut4, vec2(0.02, coord_y));
res += inp[local_pos + 0] * w + inp[local_pos + 90] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.06, coord_y));
res += inp[local_pos + 1] * w + inp[local_pos + 89] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.1, coord_y));
res += inp[local_pos + 2] * w + inp[local_pos + 88] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.14, coord_y));
res += inp[local_pos + 3] * w + inp[local_pos + 87] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.18, coord_y));
res += inp[local_pos + 4] * w + inp[local_pos + 86] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.22, coord_y));
res += inp[local_pos + 5] * w + inp[local_pos + 85] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.26, coord_y));
res += inp[local_pos + 6] * w + inp[local_pos + 84] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.3, coord_y));
res += inp[local_pos + 14] * w + inp[local_pos + 76] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.34, coord_y));
res += inp[local_pos + 15] * w + inp[local_pos + 75] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.38, coord_y));
res += inp[local_pos + 16] * w + inp[local_pos + 74] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.42, coord_y));
res += inp[local_pos + 17] * w + inp[local_pos + 73] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.46, coord_y));
res += inp[local_pos + 18] * w + inp[local_pos + 72] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.5, coord_y));
res += inp[local_pos + 19] * w + inp[local_pos + 71] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.54, coord_y));
res += inp[local_pos + 20] * w + inp[local_pos + 70] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.58, coord_y));
res += inp[local_pos + 28] * w + inp[local_pos + 62] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.62, coord_y));
res += inp[local_pos + 29] * w + inp[local_pos + 61] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.66, coord_y));
res += inp[local_pos + 30] * w + inp[local_pos + 60] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.7, coord_y));
res += inp[local_pos + 31] * w + inp[local_pos + 59] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.74, coord_y));
res += inp[local_pos + 32] * w + inp[local_pos + 58] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.78, coord_y));
res += inp[local_pos + 33] * w + inp[local_pos + 57] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.82, coord_y));
res += inp[local_pos + 34] * w + inp[local_pos + 56] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.86, coord_y));
res += inp[local_pos + 42] * w + inp[local_pos + 48] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.9, coord_y));
res += inp[local_pos + 43] * w + inp[local_pos + 47] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.94, coord_y));
res += inp[local_pos + 44] * w + inp[local_pos + 46] * w.wzyx;
w = texture(ravu_lite_lut4, vec2(0.98, coord_y));
res += inp[local_pos + 45] * w;
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res[0], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res[1], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res[2], 0.0, 0.0, 0.0));
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res[3], 0.0, 0.0, 0.0));
}

View file

@ -0,0 +1,449 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu.py --target luma --weights-file weights\ravu_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lut2;
//!TEXTURE
//!FORMAT R16_FLOAT
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D ravu_int11;
//!SAMPLER
//!FILTER POINT
SamplerState sam_ravu_int11;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 2
//!PASS 1
//!DESC RAVU (step1, luma, r2, compute)
//!IN INPUT, ravu_lut2
//!OUT ravu_int11
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
shared float inp0[385];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) { ravu_int11[pos] = (value); }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut2_tex(pos) (vec4(texture(ravu_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 11, y = (uint)id % 11;
inp0[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x;
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma0 = inp0[local_pos + 0];
float luma4 = inp0[local_pos + 11];
float luma5 = inp0[local_pos + 12];
float luma6 = inp0[local_pos + 13];
float luma7 = inp0[local_pos + 14];
float luma1 = inp0[local_pos + 1];
float luma8 = inp0[local_pos + 22];
float luma9 = inp0[local_pos + 23];
float luma10 = inp0[local_pos + 24];
float luma11 = inp0[local_pos + 25];
float luma2 = inp0[local_pos + 2];
float luma12 = inp0[local_pos + 33];
float luma13 = inp0[local_pos + 34];
float luma14 = inp0[local_pos + 35];
float luma15 = inp0[local_pos + 36];
float luma3 = inp0[local_pos + 3];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma4 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6 - luma2);
gy = (luma3 - luma1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7 - luma3);
gy = (luma3 - luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8 - luma0) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9 - luma1) / 2.0;
gy = (luma6 - luma4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10 - luma2) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11 - luma3) / 2.0;
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma4) / 2.0;
gy = (luma9 - luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13 - luma5) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14 - luma6) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15 - luma7) / 2.0;
gy = (luma11 - luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma8);
gy = (luma13 - luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13 - luma9);
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14 - luma10);
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma11);
gy = (luma15 - luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp0[local_pos + 0] + inp0[local_pos + 36]) * w[0];
res += (inp0[local_pos + 1] + inp0[local_pos + 35]) * w[1];
res += (inp0[local_pos + 2] + inp0[local_pos + 34]) * w[2];
res += (inp0[local_pos + 3] + inp0[local_pos + 33]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp0[local_pos + 11] + inp0[local_pos + 25]) * w[0];
res += (inp0[local_pos + 12] + inp0[local_pos + 24]) * w[1];
res += (inp0[local_pos + 13] + inp0[local_pos + 23]) * w[2];
res += (inp0[local_pos + 14] + inp0[local_pos + 22]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), res);
}
}
//!PASS 2
//!DESC RAVU (step2, luma, r2, compute)
//!IN INPUT, ravu_lut2, ravu_int11
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp0[385];
shared float inp1[385];
#define CURRENT_PASS 2
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut2_tex(pos) (vec4(texture(ravu_lut2, pos)))
#define ravu_int11_tex(pos) (float(texture(ravu_int11, pos).x))
static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y);
static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y));
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass2(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 11, y = (uint)id % 11;
inp0[id] =
ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5)))
.x;
}
}
{
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 11, y = (uint)id % 11;
inp1[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).x;
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma8 = inp0[local_pos + 12];
float luma5 = inp0[local_pos + 13];
float luma2 = inp0[local_pos + 14];
float luma13 = inp0[local_pos + 23];
float luma10 = inp0[local_pos + 24];
float luma7 = inp0[local_pos + 25];
float luma0 = inp0[local_pos + 2];
float luma15 = inp0[local_pos + 35];
float luma12 = inp1[local_pos + 11];
float luma9 = inp1[local_pos + 12];
float luma6 = inp1[local_pos + 13];
float luma3 = inp1[local_pos + 14];
float luma4 = inp1[local_pos + 1];
float luma14 = inp1[local_pos + 23];
float luma11 = inp1[local_pos + 24];
float luma1 = inp1[local_pos + 2];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma4 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6 - luma2);
gy = (luma3 - luma1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7 - luma3);
gy = (luma3 - luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8 - luma0) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9 - luma1) / 2.0;
gy = (luma6 - luma4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10 - luma2) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11 - luma3) / 2.0;
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma4) / 2.0;
gy = (luma9 - luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13 - luma5) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14 - luma6) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15 - luma7) / 2.0;
gy = (luma11 - luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma8);
gy = (luma13 - luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13 - luma9);
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14 - luma10);
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma11);
gy = (luma15 - luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp0[local_pos + 2] + inp0[local_pos + 35]) * w[0];
res += (inp1[local_pos + 2] + inp1[local_pos + 23]) * w[1];
res += (inp0[local_pos + 14] + inp0[local_pos + 23]) * w[2];
res += (inp1[local_pos + 14] + inp1[local_pos + 11]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp1[local_pos + 1] + inp1[local_pos + 24]) * w[0];
res += (inp0[local_pos + 13] + inp0[local_pos + 24]) * w[1];
res += (inp1[local_pos + 13] + inp1[local_pos + 12]) * w[2];
res += (inp0[local_pos + 25] + inp0[local_pos + 12]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), res);
}
{
float luma4 = inp0[local_pos + 12];
float luma1 = inp0[local_pos + 13];
float luma12 = inp0[local_pos + 22];
float luma9 = inp0[local_pos + 23];
float luma6 = inp0[local_pos + 24];
float luma3 = inp0[local_pos + 25];
float luma14 = inp0[local_pos + 34];
float luma11 = inp0[local_pos + 35];
float luma8 = inp1[local_pos + 11];
float luma5 = inp1[local_pos + 12];
float luma2 = inp1[local_pos + 13];
float luma0 = inp1[local_pos + 1];
float luma13 = inp1[local_pos + 22];
float luma10 = inp1[local_pos + 23];
float luma7 = inp1[local_pos + 24];
float luma15 = inp1[local_pos + 34];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma4 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6 - luma2);
gy = (luma3 - luma1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7 - luma3);
gy = (luma3 - luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8 - luma0) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9 - luma1) / 2.0;
gy = (luma6 - luma4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10 - luma2) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11 - luma3) / 2.0;
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma4) / 2.0;
gy = (luma9 - luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13 - luma5) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14 - luma6) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15 - luma7) / 2.0;
gy = (luma11 - luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma8);
gy = (luma13 - luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13 - luma9);
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14 - luma10);
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma11);
gy = (luma15 - luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp1[local_pos + 1] + inp1[local_pos + 34]) * w[0];
res += (inp0[local_pos + 13] + inp0[local_pos + 34]) * w[1];
res += (inp1[local_pos + 13] + inp1[local_pos + 22]) * w[2];
res += (inp0[local_pos + 25] + inp0[local_pos + 22]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp0[local_pos + 12] + inp0[local_pos + 35]) * w[0];
res += (inp1[local_pos + 12] + inp1[local_pos + 23]) * w[1];
res += (inp0[local_pos + 24] + inp0[local_pos + 23]) * w[2];
res += (inp1[local_pos + 24] + inp1[local_pos + 11]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), res);
}
float res;
res = inp0[local_pos + 24];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), res);
res = inp1[local_pos + 12];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), res);
}

View file

@ -0,0 +1,450 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu.py --target rgb --weights-file weights\ravu_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lut2;
//!TEXTURE
//!FORMAT R16G16B16A16_FLOAT
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D ravu_int11;
//!SAMPLER
//!FILTER POINT
SamplerState sam_ravu_int11;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 2
//!PASS 1
//!DESC RAVU (step1, rgb, r2, compute)
//!IN INPUT, ravu_lut2
//!OUT ravu_int11
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
shared vec3 inp0[385];
shared float inp_luma0[385];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.xyz)
void imageStoreOverride(uint2 pos, vec3 value) { ravu_int11[pos] = vec4(value, 0.0); }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut2_tex(pos) (vec4(texture(ravu_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 11, y = (uint)id % 11;
inp0[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).xyz;
inp_luma0[id] = dot(inp0[id], color_primary);
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma0 = inp_luma0[local_pos + 0];
float luma4 = inp_luma0[local_pos + 11];
float luma5 = inp_luma0[local_pos + 12];
float luma6 = inp_luma0[local_pos + 13];
float luma7 = inp_luma0[local_pos + 14];
float luma1 = inp_luma0[local_pos + 1];
float luma8 = inp_luma0[local_pos + 22];
float luma9 = inp_luma0[local_pos + 23];
float luma10 = inp_luma0[local_pos + 24];
float luma11 = inp_luma0[local_pos + 25];
float luma2 = inp_luma0[local_pos + 2];
float luma12 = inp_luma0[local_pos + 33];
float luma13 = inp_luma0[local_pos + 34];
float luma14 = inp_luma0[local_pos + 35];
float luma15 = inp_luma0[local_pos + 36];
float luma3 = inp_luma0[local_pos + 3];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma4 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6 - luma2);
gy = (luma3 - luma1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7 - luma3);
gy = (luma3 - luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8 - luma0) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9 - luma1) / 2.0;
gy = (luma6 - luma4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10 - luma2) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11 - luma3) / 2.0;
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma4) / 2.0;
gy = (luma9 - luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13 - luma5) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14 - luma6) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15 - luma7) / 2.0;
gy = (luma11 - luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma8);
gy = (luma13 - luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13 - luma9);
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14 - luma10);
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma11);
gy = (luma15 - luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp0[local_pos + 0] + inp0[local_pos + 36]) * w[0];
res += (inp0[local_pos + 1] + inp0[local_pos + 35]) * w[1];
res += (inp0[local_pos + 2] + inp0[local_pos + 34]) * w[2];
res += (inp0[local_pos + 3] + inp0[local_pos + 33]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp0[local_pos + 11] + inp0[local_pos + 25]) * w[0];
res += (inp0[local_pos + 12] + inp0[local_pos + 24]) * w[1];
res += (inp0[local_pos + 13] + inp0[local_pos + 23]) * w[2];
res += (inp0[local_pos + 14] + inp0[local_pos + 22]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0));
}
}
//!PASS 2
//!DESC RAVU (step2, rgb, r2, compute)
//!IN INPUT, ravu_lut2, ravu_int11
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
shared vec3 inp0[385];
shared float inp_luma0[385];
shared vec3 inp1[385];
shared float inp_luma1[385];
#define CURRENT_PASS 2
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut2_tex(pos) (vec4(texture(ravu_lut2, pos)))
#define ravu_int11_tex(pos) (vec3(texture(ravu_int11, pos).xyz))
static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y);
static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y));
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass2(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 11 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 11, y = (uint)id % 11;
inp0[id] =
ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5)))
.xyz;
inp_luma0[id] = dot(inp0[id], color_primary);
}
}
{
for (int id = int(gl_LocalInvocationIndex); id < 385; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 11, y = (uint)id % 11;
inp1[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-0.5), float(group_base.y + y) + (-0.5))).xyz;
inp_luma1[id] = dot(inp1[id], color_primary);
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma8 = inp_luma0[local_pos + 12];
float luma5 = inp_luma0[local_pos + 13];
float luma2 = inp_luma0[local_pos + 14];
float luma13 = inp_luma0[local_pos + 23];
float luma10 = inp_luma0[local_pos + 24];
float luma7 = inp_luma0[local_pos + 25];
float luma0 = inp_luma0[local_pos + 2];
float luma15 = inp_luma0[local_pos + 35];
float luma12 = inp_luma1[local_pos + 11];
float luma9 = inp_luma1[local_pos + 12];
float luma6 = inp_luma1[local_pos + 13];
float luma3 = inp_luma1[local_pos + 14];
float luma4 = inp_luma1[local_pos + 1];
float luma14 = inp_luma1[local_pos + 23];
float luma11 = inp_luma1[local_pos + 24];
float luma1 = inp_luma1[local_pos + 2];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma4 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6 - luma2);
gy = (luma3 - luma1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7 - luma3);
gy = (luma3 - luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8 - luma0) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9 - luma1) / 2.0;
gy = (luma6 - luma4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10 - luma2) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11 - luma3) / 2.0;
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma4) / 2.0;
gy = (luma9 - luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13 - luma5) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14 - luma6) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15 - luma7) / 2.0;
gy = (luma11 - luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma8);
gy = (luma13 - luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13 - luma9);
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14 - luma10);
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma11);
gy = (luma15 - luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp0[local_pos + 2] + inp0[local_pos + 35]) * w[0];
res += (inp1[local_pos + 2] + inp1[local_pos + 23]) * w[1];
res += (inp0[local_pos + 14] + inp0[local_pos + 23]) * w[2];
res += (inp1[local_pos + 14] + inp1[local_pos + 11]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp1[local_pos + 1] + inp1[local_pos + 24]) * w[0];
res += (inp0[local_pos + 13] + inp0[local_pos + 24]) * w[1];
res += (inp1[local_pos + 13] + inp1[local_pos + 12]) * w[2];
res += (inp0[local_pos + 25] + inp0[local_pos + 12]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res, 1.0));
}
{
float luma4 = inp_luma0[local_pos + 12];
float luma1 = inp_luma0[local_pos + 13];
float luma12 = inp_luma0[local_pos + 22];
float luma9 = inp_luma0[local_pos + 23];
float luma6 = inp_luma0[local_pos + 24];
float luma3 = inp_luma0[local_pos + 25];
float luma14 = inp_luma0[local_pos + 34];
float luma11 = inp_luma0[local_pos + 35];
float luma8 = inp_luma1[local_pos + 11];
float luma5 = inp_luma1[local_pos + 12];
float luma2 = inp_luma1[local_pos + 13];
float luma0 = inp_luma1[local_pos + 1];
float luma13 = inp_luma1[local_pos + 22];
float luma10 = inp_luma1[local_pos + 23];
float luma7 = inp_luma1[local_pos + 24];
float luma15 = inp_luma1[local_pos + 34];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma4 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6 - luma2);
gy = (luma3 - luma1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7 - luma3);
gy = (luma3 - luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8 - luma0) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9 - luma1) / 2.0;
gy = (luma6 - luma4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10 - luma2) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11 - luma3) / 2.0;
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma4) / 2.0;
gy = (luma9 - luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13 - luma5) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14 - luma6) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15 - luma7) / 2.0;
gy = (luma11 - luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma8);
gy = (luma13 - luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13 - luma9);
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14 - luma10);
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma11);
gy = (luma15 - luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut2, vec2(0.25, coord_y));
res += (inp1[local_pos + 1] + inp1[local_pos + 34]) * w[0];
res += (inp0[local_pos + 13] + inp0[local_pos + 34]) * w[1];
res += (inp1[local_pos + 13] + inp1[local_pos + 22]) * w[2];
res += (inp0[local_pos + 25] + inp0[local_pos + 22]) * w[3];
w = texture(ravu_lut2, vec2(0.75, coord_y));
res += (inp0[local_pos + 12] + inp0[local_pos + 35]) * w[0];
res += (inp1[local_pos + 12] + inp1[local_pos + 23]) * w[1];
res += (inp0[local_pos + 24] + inp0[local_pos + 23]) * w[2];
res += (inp1[local_pos + 24] + inp1[local_pos + 11]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res, 1.0));
}
vec3 res;
res = inp0[local_pos + 24];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res, 1.0));
res = inp1[local_pos + 12];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res, 1.0));
}

View file

@ -0,0 +1,536 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu.py --target luma --weights-file weights\ravu_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lut3;
//!TEXTURE
//!FORMAT R16_FLOAT
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D ravu_int11;
//!SAMPLER
//!FILTER POINT
SamplerState sam_ravu_int11;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 2
//!PASS 1
//!DESC RAVU (step1, luma, r3, compute)
//!IN INPUT, ravu_lut3
//!OUT ravu_int11
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
shared float inp0[481];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) { ravu_int11[pos] = (value); }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut3_tex(pos) (vec4(texture(ravu_lut3, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 13, y = (uint)id % 13;
inp0[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x;
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma6 = inp0[local_pos + 13];
float luma7 = inp0[local_pos + 14];
float luma8 = inp0[local_pos + 15];
float luma9 = inp0[local_pos + 16];
float luma10 = inp0[local_pos + 17];
float luma11 = inp0[local_pos + 18];
float luma1 = inp0[local_pos + 1];
float luma12 = inp0[local_pos + 26];
float luma13 = inp0[local_pos + 27];
float luma14 = inp0[local_pos + 28];
float luma15 = inp0[local_pos + 29];
float luma2 = inp0[local_pos + 2];
float luma16 = inp0[local_pos + 30];
float luma17 = inp0[local_pos + 31];
float luma18 = inp0[local_pos + 39];
float luma3 = inp0[local_pos + 3];
float luma19 = inp0[local_pos + 40];
float luma20 = inp0[local_pos + 41];
float luma21 = inp0[local_pos + 42];
float luma22 = inp0[local_pos + 43];
float luma23 = inp0[local_pos + 44];
float luma4 = inp0[local_pos + 4];
float luma24 = inp0[local_pos + 52];
float luma25 = inp0[local_pos + 53];
float luma26 = inp0[local_pos + 54];
float luma27 = inp0[local_pos + 55];
float luma28 = inp0[local_pos + 56];
float luma29 = inp0[local_pos + 57];
float luma31 = inp0[local_pos + 66];
float luma32 = inp0[local_pos + 67];
float luma33 = inp0[local_pos + 68];
float luma34 = inp0[local_pos + 69];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma13 - luma1) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma14 - luma2) / 2.0;
gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma3) / 2.0;
gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma16 - luma4) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0;
gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0;
gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma31 - luma19) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma32 - luma20) / 2.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma33 - luma21) / 2.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma34 - luma22) / 2.0;
gy = (luma29 - luma27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut3, vec2(0.1, coord_y));
res += (inp0[local_pos + 0] + inp0[local_pos + 70]) * w[0];
res += (inp0[local_pos + 1] + inp0[local_pos + 69]) * w[1];
res += (inp0[local_pos + 2] + inp0[local_pos + 68]) * w[2];
res += (inp0[local_pos + 3] + inp0[local_pos + 67]) * w[3];
w = texture(ravu_lut3, vec2(0.3, coord_y));
res += (inp0[local_pos + 4] + inp0[local_pos + 66]) * w[0];
res += (inp0[local_pos + 5] + inp0[local_pos + 65]) * w[1];
res += (inp0[local_pos + 13] + inp0[local_pos + 57]) * w[2];
res += (inp0[local_pos + 14] + inp0[local_pos + 56]) * w[3];
w = texture(ravu_lut3, vec2(0.5, coord_y));
res += (inp0[local_pos + 15] + inp0[local_pos + 55]) * w[0];
res += (inp0[local_pos + 16] + inp0[local_pos + 54]) * w[1];
res += (inp0[local_pos + 17] + inp0[local_pos + 53]) * w[2];
res += (inp0[local_pos + 18] + inp0[local_pos + 52]) * w[3];
w = texture(ravu_lut3, vec2(0.7, coord_y));
res += (inp0[local_pos + 26] + inp0[local_pos + 44]) * w[0];
res += (inp0[local_pos + 27] + inp0[local_pos + 43]) * w[1];
res += (inp0[local_pos + 28] + inp0[local_pos + 42]) * w[2];
res += (inp0[local_pos + 29] + inp0[local_pos + 41]) * w[3];
w = texture(ravu_lut3, vec2(0.9, coord_y));
res += (inp0[local_pos + 30] + inp0[local_pos + 40]) * w[0];
res += (inp0[local_pos + 31] + inp0[local_pos + 39]) * w[1];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), res);
}
}
//!PASS 2
//!DESC RAVU (step2, luma, r3, compute)
//!IN INPUT, ravu_lut3, ravu_int11
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp0[481];
shared float inp1[481];
#define CURRENT_PASS 2
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut3_tex(pos) (vec4(texture(ravu_lut3, pos)))
#define ravu_int11_tex(pos) (float(texture(ravu_int11, pos).x))
static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y);
static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y));
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass2(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 13, y = (uint)id % 13;
inp0[id] =
ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5)))
.x;
}
}
{
for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 13, y = (uint)id % 13;
inp1[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).x;
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma12 = inp0[local_pos + 15];
float luma7 = inp0[local_pos + 16];
float luma2 = inp0[local_pos + 17];
float luma24 = inp0[local_pos + 27];
float luma19 = inp0[local_pos + 28];
float luma14 = inp0[local_pos + 29];
float luma9 = inp0[local_pos + 30];
float luma4 = inp0[local_pos + 31];
float luma31 = inp0[local_pos + 40];
float luma26 = inp0[local_pos + 41];
float luma21 = inp0[local_pos + 42];
float luma16 = inp0[local_pos + 43];
float luma11 = inp0[local_pos + 44];
float luma33 = inp0[local_pos + 54];
float luma28 = inp0[local_pos + 55];
float luma23 = inp0[local_pos + 56];
float luma18 = inp1[local_pos + 14];
float luma13 = inp1[local_pos + 15];
float luma8 = inp1[local_pos + 16];
float luma3 = inp1[local_pos + 17];
float luma25 = inp1[local_pos + 27];
float luma20 = inp1[local_pos + 28];
float luma15 = inp1[local_pos + 29];
float luma6 = inp1[local_pos + 2];
float luma10 = inp1[local_pos + 30];
float luma1 = inp1[local_pos + 3];
float luma32 = inp1[local_pos + 40];
float luma27 = inp1[local_pos + 41];
float luma22 = inp1[local_pos + 42];
float luma17 = inp1[local_pos + 43];
float luma34 = inp1[local_pos + 54];
float luma29 = inp1[local_pos + 55];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma13 - luma1) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma14 - luma2) / 2.0;
gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma3) / 2.0;
gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma16 - luma4) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0;
gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0;
gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma31 - luma19) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma32 - luma20) / 2.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma33 - luma21) / 2.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma34 - luma22) / 2.0;
gy = (luma29 - luma27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut3, vec2(0.1, coord_y));
res += (inp0[local_pos + 3] + inp0[local_pos + 68]) * w[0];
res += (inp1[local_pos + 3] + inp1[local_pos + 54]) * w[1];
res += (inp0[local_pos + 17] + inp0[local_pos + 54]) * w[2];
res += (inp1[local_pos + 17] + inp1[local_pos + 40]) * w[3];
w = texture(ravu_lut3, vec2(0.3, coord_y));
res += (inp0[local_pos + 31] + inp0[local_pos + 40]) * w[0];
res += (inp1[local_pos + 31] + inp1[local_pos + 26]) * w[1];
res += (inp1[local_pos + 2] + inp1[local_pos + 55]) * w[2];
res += (inp0[local_pos + 16] + inp0[local_pos + 55]) * w[3];
w = texture(ravu_lut3, vec2(0.5, coord_y));
res += (inp1[local_pos + 16] + inp1[local_pos + 41]) * w[0];
res += (inp0[local_pos + 30] + inp0[local_pos + 41]) * w[1];
res += (inp1[local_pos + 30] + inp1[local_pos + 27]) * w[2];
res += (inp0[local_pos + 44] + inp0[local_pos + 27]) * w[3];
w = texture(ravu_lut3, vec2(0.7, coord_y));
res += (inp0[local_pos + 15] + inp0[local_pos + 56]) * w[0];
res += (inp1[local_pos + 15] + inp1[local_pos + 42]) * w[1];
res += (inp0[local_pos + 29] + inp0[local_pos + 42]) * w[2];
res += (inp1[local_pos + 29] + inp1[local_pos + 28]) * w[3];
w = texture(ravu_lut3, vec2(0.9, coord_y));
res += (inp0[local_pos + 43] + inp0[local_pos + 28]) * w[0];
res += (inp1[local_pos + 43] + inp1[local_pos + 14]) * w[1];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), res);
}
{
float luma6 = inp0[local_pos + 15];
float luma1 = inp0[local_pos + 16];
float luma18 = inp0[local_pos + 27];
float luma13 = inp0[local_pos + 28];
float luma8 = inp0[local_pos + 29];
float luma3 = inp0[local_pos + 30];
float luma25 = inp0[local_pos + 40];
float luma20 = inp0[local_pos + 41];
float luma15 = inp0[local_pos + 42];
float luma10 = inp0[local_pos + 43];
float luma32 = inp0[local_pos + 53];
float luma27 = inp0[local_pos + 54];
float luma22 = inp0[local_pos + 55];
float luma17 = inp0[local_pos + 56];
float luma34 = inp0[local_pos + 67];
float luma29 = inp0[local_pos + 68];
float luma12 = inp1[local_pos + 14];
float luma7 = inp1[local_pos + 15];
float luma2 = inp1[local_pos + 16];
float luma24 = inp1[local_pos + 26];
float luma19 = inp1[local_pos + 27];
float luma14 = inp1[local_pos + 28];
float luma9 = inp1[local_pos + 29];
float luma4 = inp1[local_pos + 30];
float luma31 = inp1[local_pos + 39];
float luma26 = inp1[local_pos + 40];
float luma21 = inp1[local_pos + 41];
float luma16 = inp1[local_pos + 42];
float luma11 = inp1[local_pos + 43];
float luma33 = inp1[local_pos + 53];
float luma28 = inp1[local_pos + 54];
float luma23 = inp1[local_pos + 55];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma13 - luma1) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma14 - luma2) / 2.0;
gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma3) / 2.0;
gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma16 - luma4) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0;
gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0;
gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma31 - luma19) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma32 - luma20) / 2.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma33 - luma21) / 2.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma34 - luma22) / 2.0;
gy = (luma29 - luma27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut3, vec2(0.1, coord_y));
res += (inp1[local_pos + 2] + inp1[local_pos + 67]) * w[0];
res += (inp0[local_pos + 16] + inp0[local_pos + 67]) * w[1];
res += (inp1[local_pos + 16] + inp1[local_pos + 53]) * w[2];
res += (inp0[local_pos + 30] + inp0[local_pos + 53]) * w[3];
w = texture(ravu_lut3, vec2(0.3, coord_y));
res += (inp1[local_pos + 30] + inp1[local_pos + 39]) * w[0];
res += (inp0[local_pos + 44] + inp0[local_pos + 39]) * w[1];
res += (inp0[local_pos + 15] + inp0[local_pos + 68]) * w[2];
res += (inp1[local_pos + 15] + inp1[local_pos + 54]) * w[3];
w = texture(ravu_lut3, vec2(0.5, coord_y));
res += (inp0[local_pos + 29] + inp0[local_pos + 54]) * w[0];
res += (inp1[local_pos + 29] + inp1[local_pos + 40]) * w[1];
res += (inp0[local_pos + 43] + inp0[local_pos + 40]) * w[2];
res += (inp1[local_pos + 43] + inp1[local_pos + 26]) * w[3];
w = texture(ravu_lut3, vec2(0.7, coord_y));
res += (inp1[local_pos + 14] + inp1[local_pos + 55]) * w[0];
res += (inp0[local_pos + 28] + inp0[local_pos + 55]) * w[1];
res += (inp1[local_pos + 28] + inp1[local_pos + 41]) * w[2];
res += (inp0[local_pos + 42] + inp0[local_pos + 41]) * w[3];
w = texture(ravu_lut3, vec2(0.9, coord_y));
res += (inp1[local_pos + 42] + inp1[local_pos + 27]) * w[0];
res += (inp0[local_pos + 56] + inp0[local_pos + 27]) * w[1];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), res);
}
float res;
res = inp0[local_pos + 42];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), res);
res = inp1[local_pos + 28];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), res);
}

View file

@ -0,0 +1,537 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu.py --target rgb --weights-file weights\ravu_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lut3;
//!TEXTURE
//!FORMAT R16G16B16A16_FLOAT
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D ravu_int11;
//!SAMPLER
//!FILTER POINT
SamplerState sam_ravu_int11;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 2
//!PASS 1
//!DESC RAVU (step1, rgb, r3, compute)
//!IN INPUT, ravu_lut3
//!OUT ravu_int11
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
shared vec3 inp0[481];
shared float inp_luma0[481];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.xyz)
void imageStoreOverride(uint2 pos, vec3 value) { ravu_int11[pos] = vec4(value, 0.0); }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut3_tex(pos) (vec4(texture(ravu_lut3, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 13, y = (uint)id % 13;
inp0[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).xyz;
inp_luma0[id] = dot(inp0[id], color_primary);
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma6 = inp_luma0[local_pos + 13];
float luma7 = inp_luma0[local_pos + 14];
float luma8 = inp_luma0[local_pos + 15];
float luma9 = inp_luma0[local_pos + 16];
float luma10 = inp_luma0[local_pos + 17];
float luma11 = inp_luma0[local_pos + 18];
float luma1 = inp_luma0[local_pos + 1];
float luma12 = inp_luma0[local_pos + 26];
float luma13 = inp_luma0[local_pos + 27];
float luma14 = inp_luma0[local_pos + 28];
float luma15 = inp_luma0[local_pos + 29];
float luma2 = inp_luma0[local_pos + 2];
float luma16 = inp_luma0[local_pos + 30];
float luma17 = inp_luma0[local_pos + 31];
float luma18 = inp_luma0[local_pos + 39];
float luma3 = inp_luma0[local_pos + 3];
float luma19 = inp_luma0[local_pos + 40];
float luma20 = inp_luma0[local_pos + 41];
float luma21 = inp_luma0[local_pos + 42];
float luma22 = inp_luma0[local_pos + 43];
float luma23 = inp_luma0[local_pos + 44];
float luma4 = inp_luma0[local_pos + 4];
float luma24 = inp_luma0[local_pos + 52];
float luma25 = inp_luma0[local_pos + 53];
float luma26 = inp_luma0[local_pos + 54];
float luma27 = inp_luma0[local_pos + 55];
float luma28 = inp_luma0[local_pos + 56];
float luma29 = inp_luma0[local_pos + 57];
float luma31 = inp_luma0[local_pos + 66];
float luma32 = inp_luma0[local_pos + 67];
float luma33 = inp_luma0[local_pos + 68];
float luma34 = inp_luma0[local_pos + 69];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma13 - luma1) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma14 - luma2) / 2.0;
gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma3) / 2.0;
gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma16 - luma4) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0;
gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0;
gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma31 - luma19) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma32 - luma20) / 2.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma33 - luma21) / 2.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma34 - luma22) / 2.0;
gy = (luma29 - luma27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut3, vec2(0.1, coord_y));
res += (inp0[local_pos + 0] + inp0[local_pos + 70]) * w[0];
res += (inp0[local_pos + 1] + inp0[local_pos + 69]) * w[1];
res += (inp0[local_pos + 2] + inp0[local_pos + 68]) * w[2];
res += (inp0[local_pos + 3] + inp0[local_pos + 67]) * w[3];
w = texture(ravu_lut3, vec2(0.3, coord_y));
res += (inp0[local_pos + 4] + inp0[local_pos + 66]) * w[0];
res += (inp0[local_pos + 5] + inp0[local_pos + 65]) * w[1];
res += (inp0[local_pos + 13] + inp0[local_pos + 57]) * w[2];
res += (inp0[local_pos + 14] + inp0[local_pos + 56]) * w[3];
w = texture(ravu_lut3, vec2(0.5, coord_y));
res += (inp0[local_pos + 15] + inp0[local_pos + 55]) * w[0];
res += (inp0[local_pos + 16] + inp0[local_pos + 54]) * w[1];
res += (inp0[local_pos + 17] + inp0[local_pos + 53]) * w[2];
res += (inp0[local_pos + 18] + inp0[local_pos + 52]) * w[3];
w = texture(ravu_lut3, vec2(0.7, coord_y));
res += (inp0[local_pos + 26] + inp0[local_pos + 44]) * w[0];
res += (inp0[local_pos + 27] + inp0[local_pos + 43]) * w[1];
res += (inp0[local_pos + 28] + inp0[local_pos + 42]) * w[2];
res += (inp0[local_pos + 29] + inp0[local_pos + 41]) * w[3];
w = texture(ravu_lut3, vec2(0.9, coord_y));
res += (inp0[local_pos + 30] + inp0[local_pos + 40]) * w[0];
res += (inp0[local_pos + 31] + inp0[local_pos + 39]) * w[1];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0));
}
}
//!PASS 2
//!DESC RAVU (step2, rgb, r3, compute)
//!IN INPUT, ravu_lut3, ravu_int11
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
shared vec3 inp0[481];
shared float inp_luma0[481];
shared vec3 inp1[481];
shared float inp_luma1[481];
#define CURRENT_PASS 2
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut3_tex(pos) (vec4(texture(ravu_lut3, pos)))
#define ravu_int11_tex(pos) (vec3(texture(ravu_int11, pos).xyz))
static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y);
static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y));
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass2(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 13 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 13, y = (uint)id % 13;
inp0[id] =
ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5)))
.xyz;
inp_luma0[id] = dot(inp0[id], color_primary);
}
}
{
for (int id = int(gl_LocalInvocationIndex); id < 481; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 13, y = (uint)id % 13;
inp1[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-1.5), float(group_base.y + y) + (-1.5))).xyz;
inp_luma1[id] = dot(inp1[id], color_primary);
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma12 = inp_luma0[local_pos + 15];
float luma7 = inp_luma0[local_pos + 16];
float luma2 = inp_luma0[local_pos + 17];
float luma24 = inp_luma0[local_pos + 27];
float luma19 = inp_luma0[local_pos + 28];
float luma14 = inp_luma0[local_pos + 29];
float luma9 = inp_luma0[local_pos + 30];
float luma4 = inp_luma0[local_pos + 31];
float luma31 = inp_luma0[local_pos + 40];
float luma26 = inp_luma0[local_pos + 41];
float luma21 = inp_luma0[local_pos + 42];
float luma16 = inp_luma0[local_pos + 43];
float luma11 = inp_luma0[local_pos + 44];
float luma33 = inp_luma0[local_pos + 54];
float luma28 = inp_luma0[local_pos + 55];
float luma23 = inp_luma0[local_pos + 56];
float luma18 = inp_luma1[local_pos + 14];
float luma13 = inp_luma1[local_pos + 15];
float luma8 = inp_luma1[local_pos + 16];
float luma3 = inp_luma1[local_pos + 17];
float luma25 = inp_luma1[local_pos + 27];
float luma20 = inp_luma1[local_pos + 28];
float luma15 = inp_luma1[local_pos + 29];
float luma6 = inp_luma1[local_pos + 2];
float luma10 = inp_luma1[local_pos + 30];
float luma1 = inp_luma1[local_pos + 3];
float luma32 = inp_luma1[local_pos + 40];
float luma27 = inp_luma1[local_pos + 41];
float luma22 = inp_luma1[local_pos + 42];
float luma17 = inp_luma1[local_pos + 43];
float luma34 = inp_luma1[local_pos + 54];
float luma29 = inp_luma1[local_pos + 55];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma13 - luma1) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma14 - luma2) / 2.0;
gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma3) / 2.0;
gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma16 - luma4) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0;
gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0;
gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma31 - luma19) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma32 - luma20) / 2.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma33 - luma21) / 2.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma34 - luma22) / 2.0;
gy = (luma29 - luma27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut3, vec2(0.1, coord_y));
res += (inp0[local_pos + 3] + inp0[local_pos + 68]) * w[0];
res += (inp1[local_pos + 3] + inp1[local_pos + 54]) * w[1];
res += (inp0[local_pos + 17] + inp0[local_pos + 54]) * w[2];
res += (inp1[local_pos + 17] + inp1[local_pos + 40]) * w[3];
w = texture(ravu_lut3, vec2(0.3, coord_y));
res += (inp0[local_pos + 31] + inp0[local_pos + 40]) * w[0];
res += (inp1[local_pos + 31] + inp1[local_pos + 26]) * w[1];
res += (inp1[local_pos + 2] + inp1[local_pos + 55]) * w[2];
res += (inp0[local_pos + 16] + inp0[local_pos + 55]) * w[3];
w = texture(ravu_lut3, vec2(0.5, coord_y));
res += (inp1[local_pos + 16] + inp1[local_pos + 41]) * w[0];
res += (inp0[local_pos + 30] + inp0[local_pos + 41]) * w[1];
res += (inp1[local_pos + 30] + inp1[local_pos + 27]) * w[2];
res += (inp0[local_pos + 44] + inp0[local_pos + 27]) * w[3];
w = texture(ravu_lut3, vec2(0.7, coord_y));
res += (inp0[local_pos + 15] + inp0[local_pos + 56]) * w[0];
res += (inp1[local_pos + 15] + inp1[local_pos + 42]) * w[1];
res += (inp0[local_pos + 29] + inp0[local_pos + 42]) * w[2];
res += (inp1[local_pos + 29] + inp1[local_pos + 28]) * w[3];
w = texture(ravu_lut3, vec2(0.9, coord_y));
res += (inp0[local_pos + 43] + inp0[local_pos + 28]) * w[0];
res += (inp1[local_pos + 43] + inp1[local_pos + 14]) * w[1];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res, 1.0));
}
{
float luma6 = inp_luma0[local_pos + 15];
float luma1 = inp_luma0[local_pos + 16];
float luma18 = inp_luma0[local_pos + 27];
float luma13 = inp_luma0[local_pos + 28];
float luma8 = inp_luma0[local_pos + 29];
float luma3 = inp_luma0[local_pos + 30];
float luma25 = inp_luma0[local_pos + 40];
float luma20 = inp_luma0[local_pos + 41];
float luma15 = inp_luma0[local_pos + 42];
float luma10 = inp_luma0[local_pos + 43];
float luma32 = inp_luma0[local_pos + 53];
float luma27 = inp_luma0[local_pos + 54];
float luma22 = inp_luma0[local_pos + 55];
float luma17 = inp_luma0[local_pos + 56];
float luma34 = inp_luma0[local_pos + 67];
float luma29 = inp_luma0[local_pos + 68];
float luma12 = inp_luma1[local_pos + 14];
float luma7 = inp_luma1[local_pos + 15];
float luma2 = inp_luma1[local_pos + 16];
float luma24 = inp_luma1[local_pos + 26];
float luma19 = inp_luma1[local_pos + 27];
float luma14 = inp_luma1[local_pos + 28];
float luma9 = inp_luma1[local_pos + 29];
float luma4 = inp_luma1[local_pos + 30];
float luma31 = inp_luma1[local_pos + 39];
float luma26 = inp_luma1[local_pos + 40];
float luma21 = inp_luma1[local_pos + 41];
float luma16 = inp_luma1[local_pos + 42];
float luma11 = inp_luma1[local_pos + 43];
float luma33 = inp_luma1[local_pos + 53];
float luma28 = inp_luma1[local_pos + 54];
float luma23 = inp_luma1[local_pos + 55];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma13 - luma1) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma14 - luma2) / 2.0;
gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma3) / 2.0;
gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma16 - luma4) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0;
gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0;
gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma31 - luma19) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma32 - luma20) / 2.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma33 - luma21) / 2.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma34 - luma22) / 2.0;
gy = (luma29 - luma27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut3, vec2(0.1, coord_y));
res += (inp1[local_pos + 2] + inp1[local_pos + 67]) * w[0];
res += (inp0[local_pos + 16] + inp0[local_pos + 67]) * w[1];
res += (inp1[local_pos + 16] + inp1[local_pos + 53]) * w[2];
res += (inp0[local_pos + 30] + inp0[local_pos + 53]) * w[3];
w = texture(ravu_lut3, vec2(0.3, coord_y));
res += (inp1[local_pos + 30] + inp1[local_pos + 39]) * w[0];
res += (inp0[local_pos + 44] + inp0[local_pos + 39]) * w[1];
res += (inp0[local_pos + 15] + inp0[local_pos + 68]) * w[2];
res += (inp1[local_pos + 15] + inp1[local_pos + 54]) * w[3];
w = texture(ravu_lut3, vec2(0.5, coord_y));
res += (inp0[local_pos + 29] + inp0[local_pos + 54]) * w[0];
res += (inp1[local_pos + 29] + inp1[local_pos + 40]) * w[1];
res += (inp0[local_pos + 43] + inp0[local_pos + 40]) * w[2];
res += (inp1[local_pos + 43] + inp1[local_pos + 26]) * w[3];
w = texture(ravu_lut3, vec2(0.7, coord_y));
res += (inp1[local_pos + 14] + inp1[local_pos + 55]) * w[0];
res += (inp0[local_pos + 28] + inp0[local_pos + 55]) * w[1];
res += (inp1[local_pos + 28] + inp1[local_pos + 41]) * w[2];
res += (inp0[local_pos + 42] + inp0[local_pos + 41]) * w[3];
w = texture(ravu_lut3, vec2(0.9, coord_y));
res += (inp1[local_pos + 42] + inp1[local_pos + 27]) * w[0];
res += (inp0[local_pos + 56] + inp0[local_pos + 27]) * w[1];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res, 1.0));
}
vec3 res;
res = inp0[local_pos + 42];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res, 1.0));
res = inp1[local_pos + 28];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res, 1.0));
}

View file

@ -0,0 +1,851 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu.py --target luma --weights-file weights\ravu_weights-r4.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_lut4_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lut4;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lut4;
//!TEXTURE
//!FORMAT R16_FLOAT
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D ravu_int11;
//!SAMPLER
//!FILTER POINT
SamplerState sam_ravu_int11;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 2
//!PASS 1
//!DESC RAVU (step1, luma, r4, compute)
//!IN INPUT, ravu_lut4
//!OUT ravu_int11
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
shared float inp0[585];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) { ravu_int11[pos] = (value); }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut4_tex(pos) (vec4(texture(ravu_lut4, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 15, y = (uint)id % 15;
inp0[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x;
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma57 = inp0[local_pos + 106];
float luma58 = inp0[local_pos + 107];
float luma59 = inp0[local_pos + 108];
float luma60 = inp0[local_pos + 109];
float luma61 = inp0[local_pos + 110];
float luma62 = inp0[local_pos + 111];
float luma8 = inp0[local_pos + 15];
float luma9 = inp0[local_pos + 16];
float luma10 = inp0[local_pos + 17];
float luma11 = inp0[local_pos + 18];
float luma12 = inp0[local_pos + 19];
float luma1 = inp0[local_pos + 1];
float luma13 = inp0[local_pos + 20];
float luma14 = inp0[local_pos + 21];
float luma15 = inp0[local_pos + 22];
float luma2 = inp0[local_pos + 2];
float luma16 = inp0[local_pos + 30];
float luma17 = inp0[local_pos + 31];
float luma18 = inp0[local_pos + 32];
float luma19 = inp0[local_pos + 33];
float luma20 = inp0[local_pos + 34];
float luma21 = inp0[local_pos + 35];
float luma22 = inp0[local_pos + 36];
float luma23 = inp0[local_pos + 37];
float luma3 = inp0[local_pos + 3];
float luma24 = inp0[local_pos + 45];
float luma25 = inp0[local_pos + 46];
float luma26 = inp0[local_pos + 47];
float luma27 = inp0[local_pos + 48];
float luma28 = inp0[local_pos + 49];
float luma4 = inp0[local_pos + 4];
float luma29 = inp0[local_pos + 50];
float luma30 = inp0[local_pos + 51];
float luma31 = inp0[local_pos + 52];
float luma5 = inp0[local_pos + 5];
float luma32 = inp0[local_pos + 60];
float luma33 = inp0[local_pos + 61];
float luma34 = inp0[local_pos + 62];
float luma35 = inp0[local_pos + 63];
float luma36 = inp0[local_pos + 64];
float luma37 = inp0[local_pos + 65];
float luma38 = inp0[local_pos + 66];
float luma39 = inp0[local_pos + 67];
float luma6 = inp0[local_pos + 6];
float luma40 = inp0[local_pos + 75];
float luma41 = inp0[local_pos + 76];
float luma42 = inp0[local_pos + 77];
float luma43 = inp0[local_pos + 78];
float luma44 = inp0[local_pos + 79];
float luma45 = inp0[local_pos + 80];
float luma46 = inp0[local_pos + 81];
float luma47 = inp0[local_pos + 82];
float luma48 = inp0[local_pos + 90];
float luma49 = inp0[local_pos + 91];
float luma50 = inp0[local_pos + 92];
float luma51 = inp0[local_pos + 93];
float luma52 = inp0[local_pos + 94];
float luma53 = inp0[local_pos + 95];
float luma54 = inp0[local_pos + 96];
float luma55 = inp0[local_pos + 97];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma17 - luma1) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma18 - luma2) / 2.0;
gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma19 - luma3) / 2.0;
gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma20 - luma4) / 2.0;
gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma21 - luma5) / 2.0;
gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma22 - luma6) / 2.0;
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0;
gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0;
gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0;
gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0;
gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0;
gy = (luma31 - luma29) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0;
gy = (luma34 - luma32) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0;
gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0;
gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0;
gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0;
gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0;
gy = (luma39 - luma37) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0;
gy = (luma42 - luma40) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0;
gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0;
gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0;
gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0;
gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0;
gy = (luma47 - luma45) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma57 - luma41) / 2.0;
gy = (luma50 - luma48) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma58 - luma42) / 2.0;
gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma59 - luma43) / 2.0;
gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma60 - luma44) / 2.0;
gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma61 - luma45) / 2.0;
gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma62 - luma46) / 2.0;
gy = (luma55 - luma53) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut4, vec2(0.0625, coord_y));
res += (inp0[local_pos + 0] + inp0[local_pos + 112]) * w[0];
res += (inp0[local_pos + 1] + inp0[local_pos + 111]) * w[1];
res += (inp0[local_pos + 2] + inp0[local_pos + 110]) * w[2];
res += (inp0[local_pos + 3] + inp0[local_pos + 109]) * w[3];
w = texture(ravu_lut4, vec2(0.1875, coord_y));
res += (inp0[local_pos + 4] + inp0[local_pos + 108]) * w[0];
res += (inp0[local_pos + 5] + inp0[local_pos + 107]) * w[1];
res += (inp0[local_pos + 6] + inp0[local_pos + 106]) * w[2];
res += (inp0[local_pos + 7] + inp0[local_pos + 105]) * w[3];
w = texture(ravu_lut4, vec2(0.3125, coord_y));
res += (inp0[local_pos + 15] + inp0[local_pos + 97]) * w[0];
res += (inp0[local_pos + 16] + inp0[local_pos + 96]) * w[1];
res += (inp0[local_pos + 17] + inp0[local_pos + 95]) * w[2];
res += (inp0[local_pos + 18] + inp0[local_pos + 94]) * w[3];
w = texture(ravu_lut4, vec2(0.4375, coord_y));
res += (inp0[local_pos + 19] + inp0[local_pos + 93]) * w[0];
res += (inp0[local_pos + 20] + inp0[local_pos + 92]) * w[1];
res += (inp0[local_pos + 21] + inp0[local_pos + 91]) * w[2];
res += (inp0[local_pos + 22] + inp0[local_pos + 90]) * w[3];
w = texture(ravu_lut4, vec2(0.5625, coord_y));
res += (inp0[local_pos + 30] + inp0[local_pos + 82]) * w[0];
res += (inp0[local_pos + 31] + inp0[local_pos + 81]) * w[1];
res += (inp0[local_pos + 32] + inp0[local_pos + 80]) * w[2];
res += (inp0[local_pos + 33] + inp0[local_pos + 79]) * w[3];
w = texture(ravu_lut4, vec2(0.6875, coord_y));
res += (inp0[local_pos + 34] + inp0[local_pos + 78]) * w[0];
res += (inp0[local_pos + 35] + inp0[local_pos + 77]) * w[1];
res += (inp0[local_pos + 36] + inp0[local_pos + 76]) * w[2];
res += (inp0[local_pos + 37] + inp0[local_pos + 75]) * w[3];
w = texture(ravu_lut4, vec2(0.8125, coord_y));
res += (inp0[local_pos + 45] + inp0[local_pos + 67]) * w[0];
res += (inp0[local_pos + 46] + inp0[local_pos + 66]) * w[1];
res += (inp0[local_pos + 47] + inp0[local_pos + 65]) * w[2];
res += (inp0[local_pos + 48] + inp0[local_pos + 64]) * w[3];
w = texture(ravu_lut4, vec2(0.9375, coord_y));
res += (inp0[local_pos + 49] + inp0[local_pos + 63]) * w[0];
res += (inp0[local_pos + 50] + inp0[local_pos + 62]) * w[1];
res += (inp0[local_pos + 51] + inp0[local_pos + 61]) * w[2];
res += (inp0[local_pos + 52] + inp0[local_pos + 60]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), res);
}
}
//!PASS 2
//!DESC RAVU (step2, luma, r4, compute)
//!IN INPUT, ravu_lut4, ravu_int11
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
shared float inp0[585];
shared float inp1[585];
#define CURRENT_PASS 2
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut4_tex(pos) (vec4(texture(ravu_lut4, pos)))
#define ravu_int11_tex(pos) (float(texture(ravu_int11, pos).x))
static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y);
static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y));
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass2(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 15, y = (uint)id % 15;
inp0[id] =
ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-3.5), float(group_base.y + y) + (-3.5)))
.x;
}
}
{
for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 15, y = (uint)id % 15;
inp1[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).x;
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma16 = inp0[local_pos + 18];
float luma9 = inp0[local_pos + 19];
float luma2 = inp0[local_pos + 20];
float luma32 = inp0[local_pos + 32];
float luma25 = inp0[local_pos + 33];
float luma18 = inp0[local_pos + 34];
float luma11 = inp0[local_pos + 35];
float luma4 = inp0[local_pos + 36];
float luma48 = inp0[local_pos + 46];
float luma41 = inp0[local_pos + 47];
float luma34 = inp0[local_pos + 48];
float luma27 = inp0[local_pos + 49];
float luma20 = inp0[local_pos + 50];
float luma13 = inp0[local_pos + 51];
float luma6 = inp0[local_pos + 52];
float luma57 = inp0[local_pos + 61];
float luma50 = inp0[local_pos + 62];
float luma43 = inp0[local_pos + 63];
float luma36 = inp0[local_pos + 64];
float luma29 = inp0[local_pos + 65];
float luma22 = inp0[local_pos + 66];
float luma15 = inp0[local_pos + 67];
float luma59 = inp0[local_pos + 77];
float luma52 = inp0[local_pos + 78];
float luma45 = inp0[local_pos + 79];
float luma38 = inp0[local_pos + 80];
float luma31 = inp0[local_pos + 81];
float luma61 = inp0[local_pos + 93];
float luma54 = inp0[local_pos + 94];
float luma47 = inp0[local_pos + 95];
float luma24 = inp1[local_pos + 17];
float luma17 = inp1[local_pos + 18];
float luma10 = inp1[local_pos + 19];
float luma3 = inp1[local_pos + 20];
float luma40 = inp1[local_pos + 31];
float luma33 = inp1[local_pos + 32];
float luma26 = inp1[local_pos + 33];
float luma19 = inp1[local_pos + 34];
float luma12 = inp1[local_pos + 35];
float luma5 = inp1[local_pos + 36];
float luma8 = inp1[local_pos + 3];
float luma49 = inp1[local_pos + 46];
float luma42 = inp1[local_pos + 47];
float luma35 = inp1[local_pos + 48];
float luma28 = inp1[local_pos + 49];
float luma1 = inp1[local_pos + 4];
float luma21 = inp1[local_pos + 50];
float luma14 = inp1[local_pos + 51];
float luma58 = inp1[local_pos + 61];
float luma51 = inp1[local_pos + 62];
float luma44 = inp1[local_pos + 63];
float luma37 = inp1[local_pos + 64];
float luma30 = inp1[local_pos + 65];
float luma23 = inp1[local_pos + 66];
float luma60 = inp1[local_pos + 77];
float luma53 = inp1[local_pos + 78];
float luma46 = inp1[local_pos + 79];
float luma39 = inp1[local_pos + 80];
float luma62 = inp1[local_pos + 93];
float luma55 = inp1[local_pos + 94];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma17 - luma1) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma18 - luma2) / 2.0;
gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma19 - luma3) / 2.0;
gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma20 - luma4) / 2.0;
gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma21 - luma5) / 2.0;
gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma22 - luma6) / 2.0;
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0;
gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0;
gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0;
gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0;
gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0;
gy = (luma31 - luma29) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0;
gy = (luma34 - luma32) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0;
gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0;
gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0;
gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0;
gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0;
gy = (luma39 - luma37) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0;
gy = (luma42 - luma40) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0;
gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0;
gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0;
gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0;
gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0;
gy = (luma47 - luma45) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma57 - luma41) / 2.0;
gy = (luma50 - luma48) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma58 - luma42) / 2.0;
gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma59 - luma43) / 2.0;
gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma60 - luma44) / 2.0;
gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma61 - luma45) / 2.0;
gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma62 - luma46) / 2.0;
gy = (luma55 - luma53) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut4, vec2(0.0625, coord_y));
res += (inp0[local_pos + 4] + inp0[local_pos + 109]) * w[0];
res += (inp1[local_pos + 4] + inp1[local_pos + 93]) * w[1];
res += (inp0[local_pos + 20] + inp0[local_pos + 93]) * w[2];
res += (inp1[local_pos + 20] + inp1[local_pos + 77]) * w[3];
w = texture(ravu_lut4, vec2(0.1875, coord_y));
res += (inp0[local_pos + 36] + inp0[local_pos + 77]) * w[0];
res += (inp1[local_pos + 36] + inp1[local_pos + 61]) * w[1];
res += (inp0[local_pos + 52] + inp0[local_pos + 61]) * w[2];
res += (inp1[local_pos + 52] + inp1[local_pos + 45]) * w[3];
w = texture(ravu_lut4, vec2(0.3125, coord_y));
res += (inp1[local_pos + 3] + inp1[local_pos + 94]) * w[0];
res += (inp0[local_pos + 19] + inp0[local_pos + 94]) * w[1];
res += (inp1[local_pos + 19] + inp1[local_pos + 78]) * w[2];
res += (inp0[local_pos + 35] + inp0[local_pos + 78]) * w[3];
w = texture(ravu_lut4, vec2(0.4375, coord_y));
res += (inp1[local_pos + 35] + inp1[local_pos + 62]) * w[0];
res += (inp0[local_pos + 51] + inp0[local_pos + 62]) * w[1];
res += (inp1[local_pos + 51] + inp1[local_pos + 46]) * w[2];
res += (inp0[local_pos + 67] + inp0[local_pos + 46]) * w[3];
w = texture(ravu_lut4, vec2(0.5625, coord_y));
res += (inp0[local_pos + 18] + inp0[local_pos + 95]) * w[0];
res += (inp1[local_pos + 18] + inp1[local_pos + 79]) * w[1];
res += (inp0[local_pos + 34] + inp0[local_pos + 79]) * w[2];
res += (inp1[local_pos + 34] + inp1[local_pos + 63]) * w[3];
w = texture(ravu_lut4, vec2(0.6875, coord_y));
res += (inp0[local_pos + 50] + inp0[local_pos + 63]) * w[0];
res += (inp1[local_pos + 50] + inp1[local_pos + 47]) * w[1];
res += (inp0[local_pos + 66] + inp0[local_pos + 47]) * w[2];
res += (inp1[local_pos + 66] + inp1[local_pos + 31]) * w[3];
w = texture(ravu_lut4, vec2(0.8125, coord_y));
res += (inp1[local_pos + 17] + inp1[local_pos + 80]) * w[0];
res += (inp0[local_pos + 33] + inp0[local_pos + 80]) * w[1];
res += (inp1[local_pos + 33] + inp1[local_pos + 64]) * w[2];
res += (inp0[local_pos + 49] + inp0[local_pos + 64]) * w[3];
w = texture(ravu_lut4, vec2(0.9375, coord_y));
res += (inp1[local_pos + 49] + inp1[local_pos + 48]) * w[0];
res += (inp0[local_pos + 65] + inp0[local_pos + 48]) * w[1];
res += (inp1[local_pos + 65] + inp1[local_pos + 32]) * w[2];
res += (inp0[local_pos + 81] + inp0[local_pos + 32]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), res);
}
{
float luma62 = inp0[local_pos + 108];
float luma55 = inp0[local_pos + 109];
float luma8 = inp0[local_pos + 18];
float luma1 = inp0[local_pos + 19];
float luma24 = inp0[local_pos + 32];
float luma17 = inp0[local_pos + 33];
float luma10 = inp0[local_pos + 34];
float luma3 = inp0[local_pos + 35];
float luma40 = inp0[local_pos + 46];
float luma33 = inp0[local_pos + 47];
float luma26 = inp0[local_pos + 48];
float luma19 = inp0[local_pos + 49];
float luma12 = inp0[local_pos + 50];
float luma5 = inp0[local_pos + 51];
float luma49 = inp0[local_pos + 61];
float luma42 = inp0[local_pos + 62];
float luma35 = inp0[local_pos + 63];
float luma28 = inp0[local_pos + 64];
float luma21 = inp0[local_pos + 65];
float luma14 = inp0[local_pos + 66];
float luma58 = inp0[local_pos + 76];
float luma51 = inp0[local_pos + 77];
float luma44 = inp0[local_pos + 78];
float luma37 = inp0[local_pos + 79];
float luma30 = inp0[local_pos + 80];
float luma23 = inp0[local_pos + 81];
float luma60 = inp0[local_pos + 92];
float luma53 = inp0[local_pos + 93];
float luma46 = inp0[local_pos + 94];
float luma39 = inp0[local_pos + 95];
float luma16 = inp1[local_pos + 17];
float luma9 = inp1[local_pos + 18];
float luma2 = inp1[local_pos + 19];
float luma32 = inp1[local_pos + 31];
float luma25 = inp1[local_pos + 32];
float luma18 = inp1[local_pos + 33];
float luma11 = inp1[local_pos + 34];
float luma4 = inp1[local_pos + 35];
float luma48 = inp1[local_pos + 45];
float luma41 = inp1[local_pos + 46];
float luma34 = inp1[local_pos + 47];
float luma27 = inp1[local_pos + 48];
float luma20 = inp1[local_pos + 49];
float luma13 = inp1[local_pos + 50];
float luma6 = inp1[local_pos + 51];
float luma57 = inp1[local_pos + 60];
float luma50 = inp1[local_pos + 61];
float luma43 = inp1[local_pos + 62];
float luma36 = inp1[local_pos + 63];
float luma29 = inp1[local_pos + 64];
float luma22 = inp1[local_pos + 65];
float luma15 = inp1[local_pos + 66];
float luma59 = inp1[local_pos + 76];
float luma52 = inp1[local_pos + 77];
float luma45 = inp1[local_pos + 78];
float luma38 = inp1[local_pos + 79];
float luma31 = inp1[local_pos + 80];
float luma61 = inp1[local_pos + 92];
float luma54 = inp1[local_pos + 93];
float luma47 = inp1[local_pos + 94];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma17 - luma1) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma18 - luma2) / 2.0;
gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma19 - luma3) / 2.0;
gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma20 - luma4) / 2.0;
gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma21 - luma5) / 2.0;
gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma22 - luma6) / 2.0;
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0;
gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0;
gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0;
gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0;
gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0;
gy = (luma31 - luma29) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0;
gy = (luma34 - luma32) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0;
gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0;
gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0;
gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0;
gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0;
gy = (luma39 - luma37) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0;
gy = (luma42 - luma40) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0;
gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0;
gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0;
gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0;
gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0;
gy = (luma47 - luma45) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma57 - luma41) / 2.0;
gy = (luma50 - luma48) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma58 - luma42) / 2.0;
gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma59 - luma43) / 2.0;
gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma60 - luma44) / 2.0;
gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma61 - luma45) / 2.0;
gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma62 - luma46) / 2.0;
gy = (luma55 - luma53) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
float res = 0.0;
vec4 w;
w = texture(ravu_lut4, vec2(0.0625, coord_y));
res += (inp1[local_pos + 3] + inp1[local_pos + 108]) * w[0];
res += (inp0[local_pos + 19] + inp0[local_pos + 108]) * w[1];
res += (inp1[local_pos + 19] + inp1[local_pos + 92]) * w[2];
res += (inp0[local_pos + 35] + inp0[local_pos + 92]) * w[3];
w = texture(ravu_lut4, vec2(0.1875, coord_y));
res += (inp1[local_pos + 35] + inp1[local_pos + 76]) * w[0];
res += (inp0[local_pos + 51] + inp0[local_pos + 76]) * w[1];
res += (inp1[local_pos + 51] + inp1[local_pos + 60]) * w[2];
res += (inp0[local_pos + 67] + inp0[local_pos + 60]) * w[3];
w = texture(ravu_lut4, vec2(0.3125, coord_y));
res += (inp0[local_pos + 18] + inp0[local_pos + 109]) * w[0];
res += (inp1[local_pos + 18] + inp1[local_pos + 93]) * w[1];
res += (inp0[local_pos + 34] + inp0[local_pos + 93]) * w[2];
res += (inp1[local_pos + 34] + inp1[local_pos + 77]) * w[3];
w = texture(ravu_lut4, vec2(0.4375, coord_y));
res += (inp0[local_pos + 50] + inp0[local_pos + 77]) * w[0];
res += (inp1[local_pos + 50] + inp1[local_pos + 61]) * w[1];
res += (inp0[local_pos + 66] + inp0[local_pos + 61]) * w[2];
res += (inp1[local_pos + 66] + inp1[local_pos + 45]) * w[3];
w = texture(ravu_lut4, vec2(0.5625, coord_y));
res += (inp1[local_pos + 17] + inp1[local_pos + 94]) * w[0];
res += (inp0[local_pos + 33] + inp0[local_pos + 94]) * w[1];
res += (inp1[local_pos + 33] + inp1[local_pos + 78]) * w[2];
res += (inp0[local_pos + 49] + inp0[local_pos + 78]) * w[3];
w = texture(ravu_lut4, vec2(0.6875, coord_y));
res += (inp1[local_pos + 49] + inp1[local_pos + 62]) * w[0];
res += (inp0[local_pos + 65] + inp0[local_pos + 62]) * w[1];
res += (inp1[local_pos + 65] + inp1[local_pos + 46]) * w[2];
res += (inp0[local_pos + 81] + inp0[local_pos + 46]) * w[3];
w = texture(ravu_lut4, vec2(0.8125, coord_y));
res += (inp0[local_pos + 32] + inp0[local_pos + 95]) * w[0];
res += (inp1[local_pos + 32] + inp1[local_pos + 79]) * w[1];
res += (inp0[local_pos + 48] + inp0[local_pos + 79]) * w[2];
res += (inp1[local_pos + 48] + inp1[local_pos + 63]) * w[3];
w = texture(ravu_lut4, vec2(0.9375, coord_y));
res += (inp0[local_pos + 64] + inp0[local_pos + 63]) * w[0];
res += (inp1[local_pos + 64] + inp1[local_pos + 47]) * w[1];
res += (inp0[local_pos + 80] + inp0[local_pos + 47]) * w[2];
res += (inp1[local_pos + 80] + inp1[local_pos + 31]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), res);
}
float res;
res = inp0[local_pos + 64];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), res);
res = inp1[local_pos + 48];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), res);
}

View file

@ -0,0 +1,852 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu.py --target rgb --weights-file weights\ravu_weights-r4.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//!WIDTH INPUT_WIDTH * 2
//!HEIGHT INPUT_HEIGHT * 2
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_lut4_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_lut4;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_lut4;
//!TEXTURE
//!FORMAT R16G16B16A16_FLOAT
//!WIDTH INPUT_WIDTH
//!HEIGHT INPUT_HEIGHT
Texture2D ravu_int11;
//!SAMPLER
//!FILTER POINT
SamplerState sam_ravu_int11;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 2
//!PASS 1
//!DESC RAVU (step1, rgb, r4, compute)
//!IN INPUT, ravu_lut4
//!OUT ravu_int11
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
shared vec3 inp0[585];
shared float inp_luma0[585];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.xyz)
void imageStoreOverride(uint2 pos, vec3 value) { ravu_int11[pos] = vec4(value, 0.0); }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut4_tex(pos) (vec4(texture(ravu_lut4, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 15, y = (uint)id % 15;
inp0[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).xyz;
inp_luma0[id] = dot(inp0[id], color_primary);
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma57 = inp_luma0[local_pos + 106];
float luma58 = inp_luma0[local_pos + 107];
float luma59 = inp_luma0[local_pos + 108];
float luma60 = inp_luma0[local_pos + 109];
float luma61 = inp_luma0[local_pos + 110];
float luma62 = inp_luma0[local_pos + 111];
float luma8 = inp_luma0[local_pos + 15];
float luma9 = inp_luma0[local_pos + 16];
float luma10 = inp_luma0[local_pos + 17];
float luma11 = inp_luma0[local_pos + 18];
float luma12 = inp_luma0[local_pos + 19];
float luma1 = inp_luma0[local_pos + 1];
float luma13 = inp_luma0[local_pos + 20];
float luma14 = inp_luma0[local_pos + 21];
float luma15 = inp_luma0[local_pos + 22];
float luma2 = inp_luma0[local_pos + 2];
float luma16 = inp_luma0[local_pos + 30];
float luma17 = inp_luma0[local_pos + 31];
float luma18 = inp_luma0[local_pos + 32];
float luma19 = inp_luma0[local_pos + 33];
float luma20 = inp_luma0[local_pos + 34];
float luma21 = inp_luma0[local_pos + 35];
float luma22 = inp_luma0[local_pos + 36];
float luma23 = inp_luma0[local_pos + 37];
float luma3 = inp_luma0[local_pos + 3];
float luma24 = inp_luma0[local_pos + 45];
float luma25 = inp_luma0[local_pos + 46];
float luma26 = inp_luma0[local_pos + 47];
float luma27 = inp_luma0[local_pos + 48];
float luma28 = inp_luma0[local_pos + 49];
float luma4 = inp_luma0[local_pos + 4];
float luma29 = inp_luma0[local_pos + 50];
float luma30 = inp_luma0[local_pos + 51];
float luma31 = inp_luma0[local_pos + 52];
float luma5 = inp_luma0[local_pos + 5];
float luma32 = inp_luma0[local_pos + 60];
float luma33 = inp_luma0[local_pos + 61];
float luma34 = inp_luma0[local_pos + 62];
float luma35 = inp_luma0[local_pos + 63];
float luma36 = inp_luma0[local_pos + 64];
float luma37 = inp_luma0[local_pos + 65];
float luma38 = inp_luma0[local_pos + 66];
float luma39 = inp_luma0[local_pos + 67];
float luma6 = inp_luma0[local_pos + 6];
float luma40 = inp_luma0[local_pos + 75];
float luma41 = inp_luma0[local_pos + 76];
float luma42 = inp_luma0[local_pos + 77];
float luma43 = inp_luma0[local_pos + 78];
float luma44 = inp_luma0[local_pos + 79];
float luma45 = inp_luma0[local_pos + 80];
float luma46 = inp_luma0[local_pos + 81];
float luma47 = inp_luma0[local_pos + 82];
float luma48 = inp_luma0[local_pos + 90];
float luma49 = inp_luma0[local_pos + 91];
float luma50 = inp_luma0[local_pos + 92];
float luma51 = inp_luma0[local_pos + 93];
float luma52 = inp_luma0[local_pos + 94];
float luma53 = inp_luma0[local_pos + 95];
float luma54 = inp_luma0[local_pos + 96];
float luma55 = inp_luma0[local_pos + 97];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma17 - luma1) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma18 - luma2) / 2.0;
gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma19 - luma3) / 2.0;
gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma20 - luma4) / 2.0;
gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma21 - luma5) / 2.0;
gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma22 - luma6) / 2.0;
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0;
gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0;
gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0;
gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0;
gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0;
gy = (luma31 - luma29) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0;
gy = (luma34 - luma32) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0;
gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0;
gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0;
gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0;
gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0;
gy = (luma39 - luma37) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0;
gy = (luma42 - luma40) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0;
gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0;
gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0;
gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0;
gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0;
gy = (luma47 - luma45) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma57 - luma41) / 2.0;
gy = (luma50 - luma48) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma58 - luma42) / 2.0;
gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma59 - luma43) / 2.0;
gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma60 - luma44) / 2.0;
gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma61 - luma45) / 2.0;
gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma62 - luma46) / 2.0;
gy = (luma55 - luma53) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut4, vec2(0.0625, coord_y));
res += (inp0[local_pos + 0] + inp0[local_pos + 112]) * w[0];
res += (inp0[local_pos + 1] + inp0[local_pos + 111]) * w[1];
res += (inp0[local_pos + 2] + inp0[local_pos + 110]) * w[2];
res += (inp0[local_pos + 3] + inp0[local_pos + 109]) * w[3];
w = texture(ravu_lut4, vec2(0.1875, coord_y));
res += (inp0[local_pos + 4] + inp0[local_pos + 108]) * w[0];
res += (inp0[local_pos + 5] + inp0[local_pos + 107]) * w[1];
res += (inp0[local_pos + 6] + inp0[local_pos + 106]) * w[2];
res += (inp0[local_pos + 7] + inp0[local_pos + 105]) * w[3];
w = texture(ravu_lut4, vec2(0.3125, coord_y));
res += (inp0[local_pos + 15] + inp0[local_pos + 97]) * w[0];
res += (inp0[local_pos + 16] + inp0[local_pos + 96]) * w[1];
res += (inp0[local_pos + 17] + inp0[local_pos + 95]) * w[2];
res += (inp0[local_pos + 18] + inp0[local_pos + 94]) * w[3];
w = texture(ravu_lut4, vec2(0.4375, coord_y));
res += (inp0[local_pos + 19] + inp0[local_pos + 93]) * w[0];
res += (inp0[local_pos + 20] + inp0[local_pos + 92]) * w[1];
res += (inp0[local_pos + 21] + inp0[local_pos + 91]) * w[2];
res += (inp0[local_pos + 22] + inp0[local_pos + 90]) * w[3];
w = texture(ravu_lut4, vec2(0.5625, coord_y));
res += (inp0[local_pos + 30] + inp0[local_pos + 82]) * w[0];
res += (inp0[local_pos + 31] + inp0[local_pos + 81]) * w[1];
res += (inp0[local_pos + 32] + inp0[local_pos + 80]) * w[2];
res += (inp0[local_pos + 33] + inp0[local_pos + 79]) * w[3];
w = texture(ravu_lut4, vec2(0.6875, coord_y));
res += (inp0[local_pos + 34] + inp0[local_pos + 78]) * w[0];
res += (inp0[local_pos + 35] + inp0[local_pos + 77]) * w[1];
res += (inp0[local_pos + 36] + inp0[local_pos + 76]) * w[2];
res += (inp0[local_pos + 37] + inp0[local_pos + 75]) * w[3];
w = texture(ravu_lut4, vec2(0.8125, coord_y));
res += (inp0[local_pos + 45] + inp0[local_pos + 67]) * w[0];
res += (inp0[local_pos + 46] + inp0[local_pos + 66]) * w[1];
res += (inp0[local_pos + 47] + inp0[local_pos + 65]) * w[2];
res += (inp0[local_pos + 48] + inp0[local_pos + 64]) * w[3];
w = texture(ravu_lut4, vec2(0.9375, coord_y));
res += (inp0[local_pos + 49] + inp0[local_pos + 63]) * w[0];
res += (inp0[local_pos + 50] + inp0[local_pos + 62]) * w[1];
res += (inp0[local_pos + 51] + inp0[local_pos + 61]) * w[2];
res += (inp0[local_pos + 52] + inp0[local_pos + 60]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0));
}
}
//!PASS 2
//!DESC RAVU (step2, rgb, r4, compute)
//!IN INPUT, ravu_lut4, ravu_int11
//!OUT OUTPUT
//!BLOCK_SIZE 64, 16
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
shared vec3 inp0[585];
shared float inp_luma0[585];
shared vec3 inp1[585];
shared float inp_luma1[585];
#define CURRENT_PASS 2
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_lut4_tex(pos) (vec4(texture(ravu_lut4, pos)))
#define ravu_int11_tex(pos) (vec3(texture(ravu_int11, pos).xyz))
static const float2 ravu_int11_size = float2(GetInputSize().x, GetInputSize().y);
static const float2 ravu_int11_pt = float2(1.0 / (ravu_int11_size.x), 1.0 / (ravu_int11_size.y));
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass2(uint2 blockStart, uint3 threadId) {
ivec2 group_base = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
int local_pos = int(gl_LocalInvocationID.x) * 15 + int(gl_LocalInvocationID.y);
{
for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 15, y = (uint)id % 15;
inp0[id] =
ravu_int11_tex(ravu_int11_pt * vec2(float(group_base.x + x) + (-3.5), float(group_base.y + y) + (-3.5)))
.xyz;
inp_luma0[id] = dot(inp0[id], color_primary);
}
}
{
for (int id = int(gl_LocalInvocationIndex); id < 585; id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint x = (uint)id / 15, y = (uint)id % 15;
inp1[id] =
HOOKED_tex(HOOKED_pt * vec2(float(group_base.x + x) + (-2.5), float(group_base.y + y) + (-2.5))).xyz;
inp_luma1[id] = dot(inp1[id], color_primary);
}
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy * 2;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
{
float luma16 = inp_luma0[local_pos + 18];
float luma9 = inp_luma0[local_pos + 19];
float luma2 = inp_luma0[local_pos + 20];
float luma32 = inp_luma0[local_pos + 32];
float luma25 = inp_luma0[local_pos + 33];
float luma18 = inp_luma0[local_pos + 34];
float luma11 = inp_luma0[local_pos + 35];
float luma4 = inp_luma0[local_pos + 36];
float luma48 = inp_luma0[local_pos + 46];
float luma41 = inp_luma0[local_pos + 47];
float luma34 = inp_luma0[local_pos + 48];
float luma27 = inp_luma0[local_pos + 49];
float luma20 = inp_luma0[local_pos + 50];
float luma13 = inp_luma0[local_pos + 51];
float luma6 = inp_luma0[local_pos + 52];
float luma57 = inp_luma0[local_pos + 61];
float luma50 = inp_luma0[local_pos + 62];
float luma43 = inp_luma0[local_pos + 63];
float luma36 = inp_luma0[local_pos + 64];
float luma29 = inp_luma0[local_pos + 65];
float luma22 = inp_luma0[local_pos + 66];
float luma15 = inp_luma0[local_pos + 67];
float luma59 = inp_luma0[local_pos + 77];
float luma52 = inp_luma0[local_pos + 78];
float luma45 = inp_luma0[local_pos + 79];
float luma38 = inp_luma0[local_pos + 80];
float luma31 = inp_luma0[local_pos + 81];
float luma61 = inp_luma0[local_pos + 93];
float luma54 = inp_luma0[local_pos + 94];
float luma47 = inp_luma0[local_pos + 95];
float luma24 = inp_luma1[local_pos + 17];
float luma17 = inp_luma1[local_pos + 18];
float luma10 = inp_luma1[local_pos + 19];
float luma3 = inp_luma1[local_pos + 20];
float luma40 = inp_luma1[local_pos + 31];
float luma33 = inp_luma1[local_pos + 32];
float luma26 = inp_luma1[local_pos + 33];
float luma19 = inp_luma1[local_pos + 34];
float luma12 = inp_luma1[local_pos + 35];
float luma5 = inp_luma1[local_pos + 36];
float luma8 = inp_luma1[local_pos + 3];
float luma49 = inp_luma1[local_pos + 46];
float luma42 = inp_luma1[local_pos + 47];
float luma35 = inp_luma1[local_pos + 48];
float luma28 = inp_luma1[local_pos + 49];
float luma1 = inp_luma1[local_pos + 4];
float luma21 = inp_luma1[local_pos + 50];
float luma14 = inp_luma1[local_pos + 51];
float luma58 = inp_luma1[local_pos + 61];
float luma51 = inp_luma1[local_pos + 62];
float luma44 = inp_luma1[local_pos + 63];
float luma37 = inp_luma1[local_pos + 64];
float luma30 = inp_luma1[local_pos + 65];
float luma23 = inp_luma1[local_pos + 66];
float luma60 = inp_luma1[local_pos + 77];
float luma53 = inp_luma1[local_pos + 78];
float luma46 = inp_luma1[local_pos + 79];
float luma39 = inp_luma1[local_pos + 80];
float luma62 = inp_luma1[local_pos + 93];
float luma55 = inp_luma1[local_pos + 94];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma17 - luma1) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma18 - luma2) / 2.0;
gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma19 - luma3) / 2.0;
gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma20 - luma4) / 2.0;
gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma21 - luma5) / 2.0;
gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma22 - luma6) / 2.0;
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0;
gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0;
gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0;
gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0;
gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0;
gy = (luma31 - luma29) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0;
gy = (luma34 - luma32) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0;
gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0;
gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0;
gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0;
gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0;
gy = (luma39 - luma37) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0;
gy = (luma42 - luma40) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0;
gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0;
gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0;
gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0;
gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0;
gy = (luma47 - luma45) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma57 - luma41) / 2.0;
gy = (luma50 - luma48) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma58 - luma42) / 2.0;
gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma59 - luma43) / 2.0;
gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma60 - luma44) / 2.0;
gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma61 - luma45) / 2.0;
gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma62 - luma46) / 2.0;
gy = (luma55 - luma53) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut4, vec2(0.0625, coord_y));
res += (inp0[local_pos + 4] + inp0[local_pos + 109]) * w[0];
res += (inp1[local_pos + 4] + inp1[local_pos + 93]) * w[1];
res += (inp0[local_pos + 20] + inp0[local_pos + 93]) * w[2];
res += (inp1[local_pos + 20] + inp1[local_pos + 77]) * w[3];
w = texture(ravu_lut4, vec2(0.1875, coord_y));
res += (inp0[local_pos + 36] + inp0[local_pos + 77]) * w[0];
res += (inp1[local_pos + 36] + inp1[local_pos + 61]) * w[1];
res += (inp0[local_pos + 52] + inp0[local_pos + 61]) * w[2];
res += (inp1[local_pos + 52] + inp1[local_pos + 45]) * w[3];
w = texture(ravu_lut4, vec2(0.3125, coord_y));
res += (inp1[local_pos + 3] + inp1[local_pos + 94]) * w[0];
res += (inp0[local_pos + 19] + inp0[local_pos + 94]) * w[1];
res += (inp1[local_pos + 19] + inp1[local_pos + 78]) * w[2];
res += (inp0[local_pos + 35] + inp0[local_pos + 78]) * w[3];
w = texture(ravu_lut4, vec2(0.4375, coord_y));
res += (inp1[local_pos + 35] + inp1[local_pos + 62]) * w[0];
res += (inp0[local_pos + 51] + inp0[local_pos + 62]) * w[1];
res += (inp1[local_pos + 51] + inp1[local_pos + 46]) * w[2];
res += (inp0[local_pos + 67] + inp0[local_pos + 46]) * w[3];
w = texture(ravu_lut4, vec2(0.5625, coord_y));
res += (inp0[local_pos + 18] + inp0[local_pos + 95]) * w[0];
res += (inp1[local_pos + 18] + inp1[local_pos + 79]) * w[1];
res += (inp0[local_pos + 34] + inp0[local_pos + 79]) * w[2];
res += (inp1[local_pos + 34] + inp1[local_pos + 63]) * w[3];
w = texture(ravu_lut4, vec2(0.6875, coord_y));
res += (inp0[local_pos + 50] + inp0[local_pos + 63]) * w[0];
res += (inp1[local_pos + 50] + inp1[local_pos + 47]) * w[1];
res += (inp0[local_pos + 66] + inp0[local_pos + 47]) * w[2];
res += (inp1[local_pos + 66] + inp1[local_pos + 31]) * w[3];
w = texture(ravu_lut4, vec2(0.8125, coord_y));
res += (inp1[local_pos + 17] + inp1[local_pos + 80]) * w[0];
res += (inp0[local_pos + 33] + inp0[local_pos + 80]) * w[1];
res += (inp1[local_pos + 33] + inp1[local_pos + 64]) * w[2];
res += (inp0[local_pos + 49] + inp0[local_pos + 64]) * w[3];
w = texture(ravu_lut4, vec2(0.9375, coord_y));
res += (inp1[local_pos + 49] + inp1[local_pos + 48]) * w[0];
res += (inp0[local_pos + 65] + inp0[local_pos + 48]) * w[1];
res += (inp1[local_pos + 65] + inp1[local_pos + 32]) * w[2];
res += (inp0[local_pos + 81] + inp0[local_pos + 32]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 1), vec4(res, 1.0));
}
{
float luma62 = inp_luma0[local_pos + 108];
float luma55 = inp_luma0[local_pos + 109];
float luma8 = inp_luma0[local_pos + 18];
float luma1 = inp_luma0[local_pos + 19];
float luma24 = inp_luma0[local_pos + 32];
float luma17 = inp_luma0[local_pos + 33];
float luma10 = inp_luma0[local_pos + 34];
float luma3 = inp_luma0[local_pos + 35];
float luma40 = inp_luma0[local_pos + 46];
float luma33 = inp_luma0[local_pos + 47];
float luma26 = inp_luma0[local_pos + 48];
float luma19 = inp_luma0[local_pos + 49];
float luma12 = inp_luma0[local_pos + 50];
float luma5 = inp_luma0[local_pos + 51];
float luma49 = inp_luma0[local_pos + 61];
float luma42 = inp_luma0[local_pos + 62];
float luma35 = inp_luma0[local_pos + 63];
float luma28 = inp_luma0[local_pos + 64];
float luma21 = inp_luma0[local_pos + 65];
float luma14 = inp_luma0[local_pos + 66];
float luma58 = inp_luma0[local_pos + 76];
float luma51 = inp_luma0[local_pos + 77];
float luma44 = inp_luma0[local_pos + 78];
float luma37 = inp_luma0[local_pos + 79];
float luma30 = inp_luma0[local_pos + 80];
float luma23 = inp_luma0[local_pos + 81];
float luma60 = inp_luma0[local_pos + 92];
float luma53 = inp_luma0[local_pos + 93];
float luma46 = inp_luma0[local_pos + 94];
float luma39 = inp_luma0[local_pos + 95];
float luma16 = inp_luma1[local_pos + 17];
float luma9 = inp_luma1[local_pos + 18];
float luma2 = inp_luma1[local_pos + 19];
float luma32 = inp_luma1[local_pos + 31];
float luma25 = inp_luma1[local_pos + 32];
float luma18 = inp_luma1[local_pos + 33];
float luma11 = inp_luma1[local_pos + 34];
float luma4 = inp_luma1[local_pos + 35];
float luma48 = inp_luma1[local_pos + 45];
float luma41 = inp_luma1[local_pos + 46];
float luma34 = inp_luma1[local_pos + 47];
float luma27 = inp_luma1[local_pos + 48];
float luma20 = inp_luma1[local_pos + 49];
float luma13 = inp_luma1[local_pos + 50];
float luma6 = inp_luma1[local_pos + 51];
float luma57 = inp_luma1[local_pos + 60];
float luma50 = inp_luma1[local_pos + 61];
float luma43 = inp_luma1[local_pos + 62];
float luma36 = inp_luma1[local_pos + 63];
float luma29 = inp_luma1[local_pos + 64];
float luma22 = inp_luma1[local_pos + 65];
float luma15 = inp_luma1[local_pos + 66];
float luma59 = inp_luma1[local_pos + 76];
float luma52 = inp_luma1[local_pos + 77];
float luma45 = inp_luma1[local_pos + 78];
float luma38 = inp_luma1[local_pos + 79];
float luma31 = inp_luma1[local_pos + 80];
float luma61 = inp_luma1[local_pos + 92];
float luma54 = inp_luma1[local_pos + 93];
float luma47 = inp_luma1[local_pos + 94];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma17 - luma1) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma18 - luma2) / 2.0;
gy = (-luma12 + 8.0 * luma11 - 8.0 * luma9 + luma8) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma19 - luma3) / 2.0;
gy = (-luma13 + 8.0 * luma12 - 8.0 * luma10 + luma9) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma20 - luma4) / 2.0;
gy = (-luma14 + 8.0 * luma13 - 8.0 * luma11 + luma10) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma21 - luma5) / 2.0;
gy = (-luma15 + 8.0 * luma14 - 8.0 * luma12 + luma11) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma22 - luma6) / 2.0;
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (-luma33 + 8.0 * luma25 - 8.0 * luma9 + luma1) / 12.0;
gy = (luma18 - luma16) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma34 + 8.0 * luma26 - 8.0 * luma10 + luma2) / 12.0;
gy = (-luma20 + 8.0 * luma19 - 8.0 * luma17 + luma16) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma35 + 8.0 * luma27 - 8.0 * luma11 + luma3) / 12.0;
gy = (-luma21 + 8.0 * luma20 - 8.0 * luma18 + luma17) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma36 + 8.0 * luma28 - 8.0 * luma12 + luma4) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma37 + 8.0 * luma29 - 8.0 * luma13 + luma5) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma38 + 8.0 * luma30 - 8.0 * luma14 + luma6) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma41 + 8.0 * luma33 - 8.0 * luma17 + luma9) / 12.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma42 + 8.0 * luma34 - 8.0 * luma18 + luma10) / 12.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma43 + 8.0 * luma35 - 8.0 * luma19 + luma11) / 12.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma44 + 8.0 * luma36 - 8.0 * luma20 + luma12) / 12.0;
gy = (-luma30 + 8.0 * luma29 - 8.0 * luma27 + luma26) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma45 + 8.0 * luma37 - 8.0 * luma21 + luma13) / 12.0;
gy = (-luma31 + 8.0 * luma30 - 8.0 * luma28 + luma27) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma46 + 8.0 * luma38 - 8.0 * luma22 + luma14) / 12.0;
gy = (luma31 - luma29) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma49 + 8.0 * luma41 - 8.0 * luma25 + luma17) / 12.0;
gy = (luma34 - luma32) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma50 + 8.0 * luma42 - 8.0 * luma26 + luma18) / 12.0;
gy = (-luma36 + 8.0 * luma35 - 8.0 * luma33 + luma32) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma51 + 8.0 * luma43 - 8.0 * luma27 + luma19) / 12.0;
gy = (-luma37 + 8.0 * luma36 - 8.0 * luma34 + luma33) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma52 + 8.0 * luma44 - 8.0 * luma28 + luma20) / 12.0;
gy = (-luma38 + 8.0 * luma37 - 8.0 * luma35 + luma34) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04933151482066013;
gx = (-luma53 + 8.0 * luma45 - 8.0 * luma29 + luma21) / 12.0;
gy = (-luma39 + 8.0 * luma38 - 8.0 * luma36 + luma35) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma54 + 8.0 * luma46 - 8.0 * luma30 + luma22) / 12.0;
gy = (luma39 - luma37) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (-luma57 + 8.0 * luma49 - 8.0 * luma33 + luma25) / 12.0;
gy = (luma42 - luma40) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (-luma58 + 8.0 * luma50 - 8.0 * luma34 + luma26) / 12.0;
gy = (-luma44 + 8.0 * luma43 - 8.0 * luma41 + luma40) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma59 + 8.0 * luma51 - 8.0 * luma35 + luma27) / 12.0;
gy = (-luma45 + 8.0 * luma44 - 8.0 * luma42 + luma41) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma60 + 8.0 * luma52 - 8.0 * luma36 + luma28) / 12.0;
gy = (-luma46 + 8.0 * luma45 - 8.0 * luma43 + luma42) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.03841942237242872;
gx = (-luma61 + 8.0 * luma53 - 8.0 * luma37 + luma29) / 12.0;
gy = (-luma47 + 8.0 * luma46 - 8.0 * luma44 + luma43) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.02992107622879854;
gx = (-luma62 + 8.0 * luma54 - 8.0 * luma38 + luma30) / 12.0;
gy = (luma47 - luma45) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma57 - luma41) / 2.0;
gy = (luma50 - luma48) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
gx = (luma58 - luma42) / 2.0;
gy = (-luma52 + 8.0 * luma51 - 8.0 * luma49 + luma48) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma59 - luma43) / 2.0;
gy = (-luma53 + 8.0 * luma52 - 8.0 * luma50 + luma49) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma60 - luma44) / 2.0;
gy = (-luma54 + 8.0 * luma53 - 8.0 * luma51 + luma50) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.0233025575973275;
gx = (luma61 - luma45) / 2.0;
gy = (-luma55 + 8.0 * luma54 - 8.0 * luma52 + luma51) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.018148050104365175;
gx = (luma62 - luma46) / 2.0;
gy = (luma55 - luma53) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.011007348802298533;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = clamp(floor(log2(lambda * 2000.0 + 1.192092896e-7)), 0.0, 8.0);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 9.0 + strength) * 3.0 + coherence + 0.5) / 648.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_lut4, vec2(0.0625, coord_y));
res += (inp1[local_pos + 3] + inp1[local_pos + 108]) * w[0];
res += (inp0[local_pos + 19] + inp0[local_pos + 108]) * w[1];
res += (inp1[local_pos + 19] + inp1[local_pos + 92]) * w[2];
res += (inp0[local_pos + 35] + inp0[local_pos + 92]) * w[3];
w = texture(ravu_lut4, vec2(0.1875, coord_y));
res += (inp1[local_pos + 35] + inp1[local_pos + 76]) * w[0];
res += (inp0[local_pos + 51] + inp0[local_pos + 76]) * w[1];
res += (inp1[local_pos + 51] + inp1[local_pos + 60]) * w[2];
res += (inp0[local_pos + 67] + inp0[local_pos + 60]) * w[3];
w = texture(ravu_lut4, vec2(0.3125, coord_y));
res += (inp0[local_pos + 18] + inp0[local_pos + 109]) * w[0];
res += (inp1[local_pos + 18] + inp1[local_pos + 93]) * w[1];
res += (inp0[local_pos + 34] + inp0[local_pos + 93]) * w[2];
res += (inp1[local_pos + 34] + inp1[local_pos + 77]) * w[3];
w = texture(ravu_lut4, vec2(0.4375, coord_y));
res += (inp0[local_pos + 50] + inp0[local_pos + 77]) * w[0];
res += (inp1[local_pos + 50] + inp1[local_pos + 61]) * w[1];
res += (inp0[local_pos + 66] + inp0[local_pos + 61]) * w[2];
res += (inp1[local_pos + 66] + inp1[local_pos + 45]) * w[3];
w = texture(ravu_lut4, vec2(0.5625, coord_y));
res += (inp1[local_pos + 17] + inp1[local_pos + 94]) * w[0];
res += (inp0[local_pos + 33] + inp0[local_pos + 94]) * w[1];
res += (inp1[local_pos + 33] + inp1[local_pos + 78]) * w[2];
res += (inp0[local_pos + 49] + inp0[local_pos + 78]) * w[3];
w = texture(ravu_lut4, vec2(0.6875, coord_y));
res += (inp1[local_pos + 49] + inp1[local_pos + 62]) * w[0];
res += (inp0[local_pos + 65] + inp0[local_pos + 62]) * w[1];
res += (inp1[local_pos + 65] + inp1[local_pos + 46]) * w[2];
res += (inp0[local_pos + 81] + inp0[local_pos + 46]) * w[3];
w = texture(ravu_lut4, vec2(0.8125, coord_y));
res += (inp0[local_pos + 32] + inp0[local_pos + 95]) * w[0];
res += (inp1[local_pos + 32] + inp1[local_pos + 79]) * w[1];
res += (inp0[local_pos + 48] + inp0[local_pos + 79]) * w[2];
res += (inp1[local_pos + 48] + inp1[local_pos + 63]) * w[3];
w = texture(ravu_lut4, vec2(0.9375, coord_y));
res += (inp0[local_pos + 64] + inp0[local_pos + 63]) * w[0];
res += (inp1[local_pos + 64] + inp1[local_pos + 47]) * w[1];
res += (inp0[local_pos + 80] + inp0[local_pos + 47]) * w[2];
res += (inp1[local_pos + 80] + inp1[local_pos + 31]) * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 0), vec4(res, 1.0));
}
vec3 res;
res = inp0[local_pos + 64];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(1, 1), vec4(res, 1.0));
res = inp1[local_pos + 48];
imageStore(out_image, ivec2(gl_GlobalInvocationID) * 2 + ivec2(0, 0), vec4(res, 1.0));
}

View file

@ -0,0 +1,325 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-zoom.py --target luma --weights-file weights\ravu-zoom_weights-r2.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//
//
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_zoom_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut2;
//!TEXTURE
//!SOURCE ravu_zoom_lut2_ar_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut2_ar;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut2_ar;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Zoom-AR (luma, r2, compute)
//!IN INPUT, ravu_zoom_lut2, ravu_zoom_lut2_ar
//!OUT OUTPUT
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
shared float samples[432];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_zoom_lut2_tex(pos) (vec4(texture(ravu_zoom_lut2, pos)))
#define ravu_zoom_lut2_ar_tex(pos) (vec4(texture(ravu_zoom_lut2_ar, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1);
ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 1;
ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 2;
ivec2 rect = rectr - rectl + 1;
for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y;
id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint y = (uint)id / rect.x, x = (uint)id % rect.x;
samples[x + y * 36] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID));
vec2 subpix = fract(pos - 0.5);
pos -= subpix;
subpix = LUTPOS(subpix, vec2(9.0, 9.0));
vec2 subpix_inv = 1.0 - subpix;
subpix /= vec2(2.0, 288.0);
subpix_inv /= vec2(2.0, 288.0);
ivec2 ipos = ivec2(floor(pos)) - rectl;
int lpos = ipos.x + ipos.y * 36;
float sample0 = samples[-37 + lpos];
float sample1 = samples[-1 + lpos];
float sample2 = samples[35 + lpos];
float sample3 = samples[71 + lpos];
float sample4 = samples[-36 + lpos];
float sample5 = samples[0 + lpos];
float sample6 = samples[36 + lpos];
float sample7 = samples[72 + lpos];
float sample8 = samples[-35 + lpos];
float sample9 = samples[1 + lpos];
float sample10 = samples[37 + lpos];
float sample11 = samples[73 + lpos];
float sample12 = samples[-34 + lpos];
float sample13 = samples[2 + lpos];
float sample14 = samples[38 + lpos];
float sample15 = samples[74 + lpos];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (sample4 - sample0);
gy = (sample1 - sample0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample5 - sample1);
gy = (sample2 - sample0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample6 - sample2);
gy = (sample3 - sample1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample7 - sample3);
gy = (sample3 - sample2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample8 - sample0) / 2.0;
gy = (sample5 - sample4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample9 - sample1) / 2.0;
gy = (sample6 - sample4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (sample10 - sample2) / 2.0;
gy = (sample7 - sample5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (sample11 - sample3) / 2.0;
gy = (sample7 - sample6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample12 - sample4) / 2.0;
gy = (sample9 - sample8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample13 - sample5) / 2.0;
gy = (sample10 - sample8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (sample14 - sample6) / 2.0;
gy = (sample11 - sample9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (sample15 - sample7) / 2.0;
gy = (sample11 - sample10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample12 - sample8);
gy = (sample13 - sample12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample13 - sample9);
gy = (sample14 - sample12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample14 - sample10);
gy = (sample15 - sample13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample15 - sample11);
gy = (sample15 - sample14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0;
float res = 0.0;
vec4 w;
vec4 cg, cg1;
float lo = 0.0, hi = 0.0;
float lo2 = 0.0, hi2 = 0.0;
w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix);
res += sample0 * w[0];
res += sample1 * w[1];
res += sample2 * w[2];
res += sample3 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix);
res += sample4 * w[0];
res += sample5 * w[1];
res += sample6 * w[2];
res += sample7 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix_inv);
res += sample15 * w[0];
res += sample14 * w[1];
res += sample13 * w[2];
res += sample12 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix_inv);
res += sample11 * w[0];
res += sample10 * w[1];
res += sample9 * w[2];
res += sample8 * w[3];
w = texture(ravu_zoom_lut2_ar, vec2(0.0, coord_y) + subpix);
cg = vec4(0.1 + sample0, 1.1 - sample0, 0.1 + sample1, 1.1 - sample1);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg *= cg1;
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = vec4(0.1 + sample2, 1.1 - sample2, 0.1 + sample3, 1.1 - sample3);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg *= cg1;
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut2_ar, vec2(0.5, coord_y) + subpix);
cg = vec4(0.1 + sample4, 1.1 - sample4, 0.1 + sample5, 1.1 - sample5);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg *= cg1;
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = vec4(0.1 + sample6, 1.1 - sample6, 0.1 + sample7, 1.1 - sample7);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg *= cg1;
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut2_ar, vec2(0.0, coord_y) + subpix_inv);
cg = vec4(0.1 + sample15, 1.1 - sample15, 0.1 + sample14, 1.1 - sample14);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg *= cg1;
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = vec4(0.1 + sample13, 1.1 - sample13, 0.1 + sample12, 1.1 - sample12);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg *= cg1;
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut2_ar, vec2(0.5, coord_y) + subpix_inv);
cg = vec4(0.1 + sample11, 1.1 - sample11, 0.1 + sample10, 1.1 - sample10);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg *= cg1;
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = vec4(0.1 + sample9, 1.1 - sample9, 0.1 + sample8, 1.1 - sample8);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg *= cg1;
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
hi = hi2 / hi - 0.1;
lo = 1.1 - lo2 / lo;
res = mix(res, clamp(res, lo, hi), 0.800000);
imageStore(out_image, ivec2(gl_GlobalInvocationID), res);
}

View file

@ -0,0 +1,335 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-zoom.py --target rgb --weights-file weights\ravu-zoom_weights-r2.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//
//
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_zoom_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut2;
//!TEXTURE
//!SOURCE ravu_zoom_lut2_ar_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut2_ar;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut2_ar;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Zoom-AR (rgb, r2, compute)
//!IN INPUT, ravu_zoom_lut2, ravu_zoom_lut2_ar
//!OUT OUTPUT
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
shared vec3 samples[432];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_zoom_lut2_tex(pos) (vec4(texture(ravu_zoom_lut2, pos)))
#define ravu_zoom_lut2_ar_tex(pos) (vec4(texture(ravu_zoom_lut2_ar, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1);
ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 1;
ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 2;
ivec2 rect = rectr - rectl + 1;
for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y;
id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint y = (uint)id / rect.x, x = (uint)id % rect.x;
samples[x + y * 36] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).xyz;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID));
vec2 subpix = fract(pos - 0.5);
pos -= subpix;
subpix = LUTPOS(subpix, vec2(9.0, 9.0));
vec2 subpix_inv = 1.0 - subpix;
subpix /= vec2(2.0, 288.0);
subpix_inv /= vec2(2.0, 288.0);
ivec2 ipos = ivec2(floor(pos)) - rectl;
int lpos = ipos.x + ipos.y * 36;
vec3 sample0 = samples[-37 + lpos];
vec3 sample1 = samples[-1 + lpos];
vec3 sample2 = samples[35 + lpos];
vec3 sample3 = samples[71 + lpos];
vec3 sample4 = samples[-36 + lpos];
vec3 sample5 = samples[0 + lpos];
vec3 sample6 = samples[36 + lpos];
vec3 sample7 = samples[72 + lpos];
vec3 sample8 = samples[-35 + lpos];
vec3 sample9 = samples[1 + lpos];
vec3 sample10 = samples[37 + lpos];
vec3 sample11 = samples[73 + lpos];
vec3 sample12 = samples[-34 + lpos];
vec3 sample13 = samples[2 + lpos];
vec3 sample14 = samples[38 + lpos];
vec3 sample15 = samples[74 + lpos];
float luma0 = dot(sample0, color_primary);
float luma1 = dot(sample1, color_primary);
float luma2 = dot(sample2, color_primary);
float luma3 = dot(sample3, color_primary);
float luma4 = dot(sample4, color_primary);
float luma5 = dot(sample5, color_primary);
float luma6 = dot(sample6, color_primary);
float luma7 = dot(sample7, color_primary);
float luma8 = dot(sample8, color_primary);
float luma9 = dot(sample9, color_primary);
float luma10 = dot(sample10, color_primary);
float luma11 = dot(sample11, color_primary);
float luma12 = dot(sample12, color_primary);
float luma13 = dot(sample13, color_primary);
float luma14 = dot(sample14, color_primary);
float luma15 = dot(sample15, color_primary);
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma4 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6 - luma2);
gy = (luma3 - luma1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7 - luma3);
gy = (luma3 - luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8 - luma0) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9 - luma1) / 2.0;
gy = (luma6 - luma4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10 - luma2) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11 - luma3) / 2.0;
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma4) / 2.0;
gy = (luma9 - luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13 - luma5) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14 - luma6) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15 - luma7) / 2.0;
gy = (luma11 - luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma8);
gy = (luma13 - luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13 - luma9);
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14 - luma10);
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma11);
gy = (luma15 - luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
mat4x3 cg, cg1;
vec3 lo = vec3(0.0, 0.0, 0.0), hi = vec3(0.0, 0.0, 0.0);
vec3 lo2 = vec3(0.0, 0.0, 0.0), hi2 = vec3(0.0, 0.0, 0.0);
w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix);
res += sample0 * w[0];
res += sample1 * w[1];
res += sample2 * w[2];
res += sample3 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix);
res += sample4 * w[0];
res += sample5 * w[1];
res += sample6 * w[2];
res += sample7 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix_inv);
res += sample15 * w[0];
res += sample14 * w[1];
res += sample13 * w[2];
res += sample12 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix_inv);
res += sample11 * w[0];
res += sample10 * w[1];
res += sample9 * w[2];
res += sample8 * w[3];
w = texture(ravu_zoom_lut2_ar, vec2(0.0, coord_y) + subpix);
cg = mat4x3(0.1 + sample0, 1.1 - sample0, 0.1 + sample1, 1.1 - sample1);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = mat4x3(0.1 + sample2, 1.1 - sample2, 0.1 + sample3, 1.1 - sample3);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut2_ar, vec2(0.5, coord_y) + subpix);
cg = mat4x3(0.1 + sample4, 1.1 - sample4, 0.1 + sample5, 1.1 - sample5);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = mat4x3(0.1 + sample6, 1.1 - sample6, 0.1 + sample7, 1.1 - sample7);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut2_ar, vec2(0.0, coord_y) + subpix_inv);
cg = mat4x3(0.1 + sample15, 1.1 - sample15, 0.1 + sample14, 1.1 - sample14);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = mat4x3(0.1 + sample13, 1.1 - sample13, 0.1 + sample12, 1.1 - sample12);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut2_ar, vec2(0.5, coord_y) + subpix_inv);
cg = mat4x3(0.1 + sample11, 1.1 - sample11, 0.1 + sample10, 1.1 - sample10);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = mat4x3(0.1 + sample9, 1.1 - sample9, 0.1 + sample8, 1.1 - sample8);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
hi = hi2 / hi - 0.1;
lo = 1.1 - lo2 / lo;
res = mix(res, clamp(res, lo, hi), 0.800000);
imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0));
}

View file

@ -0,0 +1,373 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-zoom.py --target luma --weights-file weights\ravu-zoom_weights-r3.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//
//
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_zoom_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut3;
//!TEXTURE
//!SOURCE ravu_zoom_lut3_ar_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut3_ar;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut3_ar;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Zoom-AR (luma, r3, compute)
//!IN INPUT, ravu_zoom_lut3, ravu_zoom_lut3_ar
//!OUT OUTPUT
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
shared float samples[532];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_zoom_lut3_tex(pos) (vec4(texture(ravu_zoom_lut3, pos)))
#define ravu_zoom_lut3_ar_tex(pos) (vec4(texture(ravu_zoom_lut3_ar, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1);
ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 2;
ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 3;
ivec2 rect = rectr - rectl + 1;
for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y;
id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint y = (uint)id / rect.x, x = (uint)id % rect.x;
samples[x + y * 38] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID));
vec2 subpix = fract(pos - 0.5);
pos -= subpix;
subpix = LUTPOS(subpix, vec2(9.0, 9.0));
vec2 subpix_inv = 1.0 - subpix;
vec2 subpix_ar = subpix / vec2(2.0, 288.0);
vec2 subpix_inv_ar = subpix_inv / vec2(2.0, 288.0);
subpix /= vec2(5.0, 288.0);
subpix_inv /= vec2(5.0, 288.0);
ivec2 ipos = ivec2(floor(pos)) - rectl;
int lpos = ipos.x + ipos.y * 38;
float sample0 = samples[-78 + lpos];
float sample1 = samples[-40 + lpos];
float sample2 = samples[-2 + lpos];
float sample3 = samples[36 + lpos];
float sample4 = samples[74 + lpos];
float sample5 = samples[112 + lpos];
float sample6 = samples[-77 + lpos];
float sample7 = samples[-39 + lpos];
float sample8 = samples[-1 + lpos];
float sample9 = samples[37 + lpos];
float sample10 = samples[75 + lpos];
float sample11 = samples[113 + lpos];
float sample12 = samples[-76 + lpos];
float sample13 = samples[-38 + lpos];
float sample14 = samples[0 + lpos];
float sample15 = samples[38 + lpos];
float sample16 = samples[76 + lpos];
float sample17 = samples[114 + lpos];
float sample18 = samples[-75 + lpos];
float sample19 = samples[-37 + lpos];
float sample20 = samples[1 + lpos];
float sample21 = samples[39 + lpos];
float sample22 = samples[77 + lpos];
float sample23 = samples[115 + lpos];
float sample24 = samples[-74 + lpos];
float sample25 = samples[-36 + lpos];
float sample26 = samples[2 + lpos];
float sample27 = samples[40 + lpos];
float sample28 = samples[78 + lpos];
float sample29 = samples[116 + lpos];
float sample30 = samples[-73 + lpos];
float sample31 = samples[-35 + lpos];
float sample32 = samples[3 + lpos];
float sample33 = samples[41 + lpos];
float sample34 = samples[79 + lpos];
float sample35 = samples[117 + lpos];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (sample13 - sample1) / 2.0;
gy = (sample8 - sample6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample14 - sample2) / 2.0;
gy = (-sample10 + 8.0 * sample9 - 8.0 * sample7 + sample6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample15 - sample3) / 2.0;
gy = (-sample11 + 8.0 * sample10 - 8.0 * sample8 + sample7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample16 - sample4) / 2.0;
gy = (sample11 - sample9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-sample25 + 8.0 * sample19 - 8.0 * sample7 + sample1) / 12.0;
gy = (sample14 - sample12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-sample26 + 8.0 * sample20 - 8.0 * sample8 + sample2) / 12.0;
gy = (-sample16 + 8.0 * sample15 - 8.0 * sample13 + sample12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-sample27 + 8.0 * sample21 - 8.0 * sample9 + sample3) / 12.0;
gy = (-sample17 + 8.0 * sample16 - 8.0 * sample14 + sample13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-sample28 + 8.0 * sample22 - 8.0 * sample10 + sample4) / 12.0;
gy = (sample17 - sample15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-sample31 + 8.0 * sample25 - 8.0 * sample13 + sample7) / 12.0;
gy = (sample20 - sample18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-sample32 + 8.0 * sample26 - 8.0 * sample14 + sample8) / 12.0;
gy = (-sample22 + 8.0 * sample21 - 8.0 * sample19 + sample18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-sample33 + 8.0 * sample27 - 8.0 * sample15 + sample9) / 12.0;
gy = (-sample23 + 8.0 * sample22 - 8.0 * sample20 + sample19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-sample34 + 8.0 * sample28 - 8.0 * sample16 + sample10) / 12.0;
gy = (sample23 - sample21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample31 - sample19) / 2.0;
gy = (sample26 - sample24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample32 - sample20) / 2.0;
gy = (-sample28 + 8.0 * sample27 - 8.0 * sample25 + sample24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample33 - sample21) / 2.0;
gy = (-sample29 + 8.0 * sample28 - 8.0 * sample26 + sample25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample34 - sample22) / 2.0;
gy = (sample29 - sample27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0;
float res = 0.0;
vec4 w;
vec4 cg, cg1;
float lo = 0.0, hi = 0.0;
float lo2 = 0.0, hi2 = 0.0;
w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix);
res += sample0 * w[0];
res += sample1 * w[1];
res += sample2 * w[2];
res += sample3 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix);
res += sample4 * w[0];
res += sample5 * w[1];
res += sample6 * w[2];
res += sample7 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix);
res += sample8 * w[0];
res += sample9 * w[1];
res += sample10 * w[2];
res += sample11 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix);
res += sample12 * w[0];
res += sample13 * w[1];
res += sample14 * w[2];
res += sample15 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix);
res += sample16 * w[0];
res += sample17 * w[1];
w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix_inv);
res += sample35 * w[0];
res += sample34 * w[1];
res += sample33 * w[2];
res += sample32 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix_inv);
res += sample31 * w[0];
res += sample30 * w[1];
res += sample29 * w[2];
res += sample28 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix_inv);
res += sample27 * w[0];
res += sample26 * w[1];
res += sample25 * w[2];
res += sample24 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix_inv);
res += sample23 * w[0];
res += sample22 * w[1];
res += sample21 * w[2];
res += sample20 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix_inv);
res += sample19 * w[0];
res += sample18 * w[1];
w = texture(ravu_zoom_lut3_ar, vec2(0.0, coord_y) + subpix_ar);
cg = vec4(0.1 + sample7, 1.1 - sample7, 0.1 + sample8, 1.1 - sample8);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg *= cg1;
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = vec4(0.1 + sample9, 1.1 - sample9, 0.1 + sample10, 1.1 - sample10);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg *= cg1;
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut3_ar, vec2(0.5, coord_y) + subpix_ar);
cg = vec4(0.1 + sample13, 1.1 - sample13, 0.1 + sample14, 1.1 - sample14);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg *= cg1;
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = vec4(0.1 + sample15, 1.1 - sample15, 0.1 + sample16, 1.1 - sample16);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg *= cg1;
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut3_ar, vec2(0.0, coord_y) + subpix_inv_ar);
cg = vec4(0.1 + sample28, 1.1 - sample28, 0.1 + sample27, 1.1 - sample27);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg *= cg1;
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = vec4(0.1 + sample26, 1.1 - sample26, 0.1 + sample25, 1.1 - sample25);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg *= cg1;
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut3_ar, vec2(0.5, coord_y) + subpix_inv_ar);
cg = vec4(0.1 + sample22, 1.1 - sample22, 0.1 + sample21, 1.1 - sample21);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg *= cg1;
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = vec4(0.1 + sample20, 1.1 - sample20, 0.1 + sample19, 1.1 - sample19);
cg1 = cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
cg *= cg;
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg *= cg1;
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
hi = hi2 / hi - 0.1;
lo = 1.1 - lo2 / lo;
res = mix(res, clamp(res, lo, hi), 0.800000);
imageStore(out_image, ivec2(gl_GlobalInvocationID), res);
}

View file

@ -0,0 +1,399 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-zoom.py --target rgb --weights-file weights\ravu-zoom_weights-r3.py --float-format float16dx --use-compute-shader --anti-ringing 0.8 --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//
//
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_zoom_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut3;
//!TEXTURE
//!SOURCE ravu_zoom_lut3_ar_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut3_ar;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut3_ar;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Zoom-AR (rgb, r3, compute)
//!IN INPUT, ravu_zoom_lut3, ravu_zoom_lut3_ar
//!OUT OUTPUT
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
shared vec3 samples[532];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_zoom_lut3_tex(pos) (vec4(texture(ravu_zoom_lut3, pos)))
#define ravu_zoom_lut3_ar_tex(pos) (vec4(texture(ravu_zoom_lut3_ar, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1);
ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 2;
ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 3;
ivec2 rect = rectr - rectl + 1;
for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y;
id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint y = (uint)id / rect.x, x = (uint)id % rect.x;
samples[x + y * 38] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).xyz;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID));
vec2 subpix = fract(pos - 0.5);
pos -= subpix;
subpix = LUTPOS(subpix, vec2(9.0, 9.0));
vec2 subpix_inv = 1.0 - subpix;
vec2 subpix_ar = subpix / vec2(2.0, 288.0);
vec2 subpix_inv_ar = subpix_inv / vec2(2.0, 288.0);
subpix /= vec2(5.0, 288.0);
subpix_inv /= vec2(5.0, 288.0);
ivec2 ipos = ivec2(floor(pos)) - rectl;
int lpos = ipos.x + ipos.y * 38;
vec3 sample0 = samples[-78 + lpos];
vec3 sample1 = samples[-40 + lpos];
vec3 sample2 = samples[-2 + lpos];
vec3 sample3 = samples[36 + lpos];
vec3 sample4 = samples[74 + lpos];
vec3 sample5 = samples[112 + lpos];
vec3 sample6 = samples[-77 + lpos];
vec3 sample7 = samples[-39 + lpos];
vec3 sample8 = samples[-1 + lpos];
vec3 sample9 = samples[37 + lpos];
vec3 sample10 = samples[75 + lpos];
vec3 sample11 = samples[113 + lpos];
vec3 sample12 = samples[-76 + lpos];
vec3 sample13 = samples[-38 + lpos];
vec3 sample14 = samples[0 + lpos];
vec3 sample15 = samples[38 + lpos];
vec3 sample16 = samples[76 + lpos];
vec3 sample17 = samples[114 + lpos];
vec3 sample18 = samples[-75 + lpos];
vec3 sample19 = samples[-37 + lpos];
vec3 sample20 = samples[1 + lpos];
vec3 sample21 = samples[39 + lpos];
vec3 sample22 = samples[77 + lpos];
vec3 sample23 = samples[115 + lpos];
vec3 sample24 = samples[-74 + lpos];
vec3 sample25 = samples[-36 + lpos];
vec3 sample26 = samples[2 + lpos];
vec3 sample27 = samples[40 + lpos];
vec3 sample28 = samples[78 + lpos];
vec3 sample29 = samples[116 + lpos];
vec3 sample30 = samples[-73 + lpos];
vec3 sample31 = samples[-35 + lpos];
vec3 sample32 = samples[3 + lpos];
vec3 sample33 = samples[41 + lpos];
vec3 sample34 = samples[79 + lpos];
vec3 sample35 = samples[117 + lpos];
float luma1 = dot(sample1, color_primary);
float luma2 = dot(sample2, color_primary);
float luma3 = dot(sample3, color_primary);
float luma4 = dot(sample4, color_primary);
float luma6 = dot(sample6, color_primary);
float luma7 = dot(sample7, color_primary);
float luma8 = dot(sample8, color_primary);
float luma9 = dot(sample9, color_primary);
float luma10 = dot(sample10, color_primary);
float luma11 = dot(sample11, color_primary);
float luma12 = dot(sample12, color_primary);
float luma13 = dot(sample13, color_primary);
float luma14 = dot(sample14, color_primary);
float luma15 = dot(sample15, color_primary);
float luma16 = dot(sample16, color_primary);
float luma17 = dot(sample17, color_primary);
float luma18 = dot(sample18, color_primary);
float luma19 = dot(sample19, color_primary);
float luma20 = dot(sample20, color_primary);
float luma21 = dot(sample21, color_primary);
float luma22 = dot(sample22, color_primary);
float luma23 = dot(sample23, color_primary);
float luma24 = dot(sample24, color_primary);
float luma25 = dot(sample25, color_primary);
float luma26 = dot(sample26, color_primary);
float luma27 = dot(sample27, color_primary);
float luma28 = dot(sample28, color_primary);
float luma29 = dot(sample29, color_primary);
float luma31 = dot(sample31, color_primary);
float luma32 = dot(sample32, color_primary);
float luma33 = dot(sample33, color_primary);
float luma34 = dot(sample34, color_primary);
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma13 - luma1) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma14 - luma2) / 2.0;
gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma3) / 2.0;
gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma16 - luma4) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0;
gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0;
gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma31 - luma19) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma32 - luma20) / 2.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma33 - luma21) / 2.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma34 - luma22) / 2.0;
gy = (luma29 - luma27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
mat4x3 cg, cg1;
vec3 lo = vec3(0.0, 0.0, 0.0), hi = vec3(0.0, 0.0, 0.0);
vec3 lo2 = vec3(0.0, 0.0, 0.0), hi2 = vec3(0.0, 0.0, 0.0);
w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix);
res += sample0 * w[0];
res += sample1 * w[1];
res += sample2 * w[2];
res += sample3 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix);
res += sample4 * w[0];
res += sample5 * w[1];
res += sample6 * w[2];
res += sample7 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix);
res += sample8 * w[0];
res += sample9 * w[1];
res += sample10 * w[2];
res += sample11 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix);
res += sample12 * w[0];
res += sample13 * w[1];
res += sample14 * w[2];
res += sample15 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix);
res += sample16 * w[0];
res += sample17 * w[1];
w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix_inv);
res += sample35 * w[0];
res += sample34 * w[1];
res += sample33 * w[2];
res += sample32 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix_inv);
res += sample31 * w[0];
res += sample30 * w[1];
res += sample29 * w[2];
res += sample28 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix_inv);
res += sample27 * w[0];
res += sample26 * w[1];
res += sample25 * w[2];
res += sample24 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix_inv);
res += sample23 * w[0];
res += sample22 * w[1];
res += sample21 * w[2];
res += sample20 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix_inv);
res += sample19 * w[0];
res += sample18 * w[1];
w = texture(ravu_zoom_lut3_ar, vec2(0.0, coord_y) + subpix_ar);
cg = mat4x3(0.1 + sample7, 1.1 - sample7, 0.1 + sample8, 1.1 - sample8);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = mat4x3(0.1 + sample9, 1.1 - sample9, 0.1 + sample10, 1.1 - sample10);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut3_ar, vec2(0.5, coord_y) + subpix_ar);
cg = mat4x3(0.1 + sample13, 1.1 - sample13, 0.1 + sample14, 1.1 - sample14);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = mat4x3(0.1 + sample15, 1.1 - sample15, 0.1 + sample16, 1.1 - sample16);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut3_ar, vec2(0.0, coord_y) + subpix_inv_ar);
cg = mat4x3(0.1 + sample28, 1.1 - sample28, 0.1 + sample27, 1.1 - sample27);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = mat4x3(0.1 + sample26, 1.1 - sample26, 0.1 + sample25, 1.1 - sample25);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
w = texture(ravu_zoom_lut3_ar, vec2(0.5, coord_y) + subpix_inv_ar);
cg = mat4x3(0.1 + sample22, 1.1 - sample22, 0.1 + sample21, 1.1 - sample21);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[0] + cg[2] * w[1];
lo += cg[1] * w[0] + cg[3] * w[1];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[0] + cg[2] * w[1];
lo2 += cg[1] * w[0] + cg[3] * w[1];
cg = mat4x3(0.1 + sample20, 1.1 - sample20, 0.1 + sample19, 1.1 - sample19);
cg1 = cg;
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
cg = matrixCompMult(cg, cg);
hi += cg[0] * w[2] + cg[2] * w[3];
lo += cg[1] * w[2] + cg[3] * w[3];
cg = matrixCompMult(cg, cg1);
hi2 += cg[0] * w[2] + cg[2] * w[3];
lo2 += cg[1] * w[2] + cg[3] * w[3];
hi = hi2 / hi - 0.1;
lo = 1.1 - lo2 / lo;
res = mix(res, clamp(res, lo, hi), 0.800000);
imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0));
}

View file

@ -0,0 +1,209 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-zoom.py --target luma --weights-file weights\ravu-zoom_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//
//
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_zoom_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut2;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Zoom (luma, r2, compute)
//!IN INPUT, ravu_zoom_lut2
//!OUT OUTPUT
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
shared float samples[432];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_zoom_lut2_tex(pos) (vec4(texture(ravu_zoom_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1);
ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 1;
ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 2;
ivec2 rect = rectr - rectl + 1;
for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y;
id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint y = (uint)id / rect.x, x = (uint)id % rect.x;
samples[x + y * 36] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).x;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID));
vec2 subpix = fract(pos - 0.5);
pos -= subpix;
subpix = LUTPOS(subpix, vec2(9.0, 9.0));
vec2 subpix_inv = 1.0 - subpix;
subpix /= vec2(2.0, 288.0);
subpix_inv /= vec2(2.0, 288.0);
ivec2 ipos = ivec2(floor(pos)) - rectl;
int lpos = ipos.x + ipos.y * 36;
float sample0 = samples[-37 + lpos];
float sample1 = samples[-1 + lpos];
float sample2 = samples[35 + lpos];
float sample3 = samples[71 + lpos];
float sample4 = samples[-36 + lpos];
float sample5 = samples[0 + lpos];
float sample6 = samples[36 + lpos];
float sample7 = samples[72 + lpos];
float sample8 = samples[-35 + lpos];
float sample9 = samples[1 + lpos];
float sample10 = samples[37 + lpos];
float sample11 = samples[73 + lpos];
float sample12 = samples[-34 + lpos];
float sample13 = samples[2 + lpos];
float sample14 = samples[38 + lpos];
float sample15 = samples[74 + lpos];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (sample4 - sample0);
gy = (sample1 - sample0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample5 - sample1);
gy = (sample2 - sample0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample6 - sample2);
gy = (sample3 - sample1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample7 - sample3);
gy = (sample3 - sample2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample8 - sample0) / 2.0;
gy = (sample5 - sample4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample9 - sample1) / 2.0;
gy = (sample6 - sample4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (sample10 - sample2) / 2.0;
gy = (sample7 - sample5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (sample11 - sample3) / 2.0;
gy = (sample7 - sample6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample12 - sample4) / 2.0;
gy = (sample9 - sample8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample13 - sample5) / 2.0;
gy = (sample10 - sample8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (sample14 - sample6) / 2.0;
gy = (sample11 - sample9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (sample15 - sample7) / 2.0;
gy = (sample11 - sample10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample12 - sample8);
gy = (sample13 - sample12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample13 - sample9);
gy = (sample14 - sample12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample14 - sample10);
gy = (sample15 - sample13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample15 - sample11);
gy = (sample15 - sample14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0;
float res = 0.0;
vec4 w;
w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix);
res += sample0 * w[0];
res += sample1 * w[1];
res += sample2 * w[2];
res += sample3 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix);
res += sample4 * w[0];
res += sample5 * w[1];
res += sample6 * w[2];
res += sample7 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix_inv);
res += sample15 * w[0];
res += sample14 * w[1];
res += sample13 * w[2];
res += sample12 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix_inv);
res += sample11 * w[0];
res += sample10 * w[1];
res += sample9 * w[2];
res += sample8 * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), res);
}

View file

@ -0,0 +1,219 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-zoom.py --target rgb --weights-file weights\ravu-zoom_weights-r2.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//
//
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_zoom_lut2_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut2;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut2;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Zoom (rgb, r2, compute)
//!IN INPUT, ravu_zoom_lut2
//!OUT OUTPUT
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
shared vec3 samples[432];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_zoom_lut2_tex(pos) (vec4(texture(ravu_zoom_lut2, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1);
ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 1;
ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 2;
ivec2 rect = rectr - rectl + 1;
for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y;
id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint y = (uint)id / rect.x, x = (uint)id % rect.x;
samples[x + y * 36] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).xyz;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID));
vec2 subpix = fract(pos - 0.5);
pos -= subpix;
subpix = LUTPOS(subpix, vec2(9.0, 9.0));
vec2 subpix_inv = 1.0 - subpix;
subpix /= vec2(2.0, 288.0);
subpix_inv /= vec2(2.0, 288.0);
ivec2 ipos = ivec2(floor(pos)) - rectl;
int lpos = ipos.x + ipos.y * 36;
vec3 sample0 = samples[-37 + lpos];
vec3 sample1 = samples[-1 + lpos];
vec3 sample2 = samples[35 + lpos];
vec3 sample3 = samples[71 + lpos];
vec3 sample4 = samples[-36 + lpos];
vec3 sample5 = samples[0 + lpos];
vec3 sample6 = samples[36 + lpos];
vec3 sample7 = samples[72 + lpos];
vec3 sample8 = samples[-35 + lpos];
vec3 sample9 = samples[1 + lpos];
vec3 sample10 = samples[37 + lpos];
vec3 sample11 = samples[73 + lpos];
vec3 sample12 = samples[-34 + lpos];
vec3 sample13 = samples[2 + lpos];
vec3 sample14 = samples[38 + lpos];
vec3 sample15 = samples[74 + lpos];
float luma0 = dot(sample0, color_primary);
float luma1 = dot(sample1, color_primary);
float luma2 = dot(sample2, color_primary);
float luma3 = dot(sample3, color_primary);
float luma4 = dot(sample4, color_primary);
float luma5 = dot(sample5, color_primary);
float luma6 = dot(sample6, color_primary);
float luma7 = dot(sample7, color_primary);
float luma8 = dot(sample8, color_primary);
float luma9 = dot(sample9, color_primary);
float luma10 = dot(sample10, color_primary);
float luma11 = dot(sample11, color_primary);
float luma12 = dot(sample12, color_primary);
float luma13 = dot(sample13, color_primary);
float luma14 = dot(sample14, color_primary);
float luma15 = dot(sample15, color_primary);
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma4 - luma0);
gy = (luma1 - luma0);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma5 - luma1);
gy = (luma2 - luma0) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma6 - luma2);
gy = (luma3 - luma1) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma7 - luma3);
gy = (luma3 - luma2);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma8 - luma0) / 2.0;
gy = (luma5 - luma4);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma9 - luma1) / 2.0;
gy = (luma6 - luma4) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma10 - luma2) / 2.0;
gy = (luma7 - luma5) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma11 - luma3) / 2.0;
gy = (luma7 - luma6);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma4) / 2.0;
gy = (luma9 - luma8);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma13 - luma5) / 2.0;
gy = (luma10 - luma8) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma14 - luma6) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (luma15 - luma7) / 2.0;
gy = (luma11 - luma10);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma12 - luma8);
gy = (luma13 - luma12);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma13 - luma9);
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma14 - luma10);
gy = (luma15 - luma13) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma11);
gy = (luma15 - luma14);
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix);
res += sample0 * w[0];
res += sample1 * w[1];
res += sample2 * w[2];
res += sample3 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix);
res += sample4 * w[0];
res += sample5 * w[1];
res += sample6 * w[2];
res += sample7 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.0, coord_y) + subpix_inv);
res += sample15 * w[0];
res += sample14 * w[1];
res += sample13 * w[2];
res += sample12 * w[3];
w = texture(ravu_zoom_lut2, vec2(0.5, coord_y) + subpix_inv);
res += sample11 * w[0];
res += sample10 * w[1];
res += sample9 * w[2];
res += sample8 * w[3];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0));
}

View file

@ -1,242 +1,255 @@
// 移植自 https://raw.githubusercontent.com/bjin/mpv-prescalers/master/compute/ravu-zoom-r3.hook
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-zoom.py --target luma --weights-file weights\ravu-zoom_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 3
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!TEXTURE
//!SOURCE RAVU_Zoom_R3_Weights.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut3;
//!SAMPLER
//!FILTER POINT
SamplerState sam;
SamplerState sam_INPUT;
//!TEXTURE
//
//
Texture2D OUTPUT;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam1;
SamplerState sam_INPUT_LINEAR;
//!TEXTURE
//!SOURCE ravu_zoom_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut3;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Zoom (luma, r3, compute)
//!IN INPUT, ravu_zoom_lut3
//!BLOCK_SIZE 16, 16
//!NUM_THREADS 16, 16
//!OUT OUTPUT
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
shared float samples[532];
#define NUM_PIXELS_X (MP_BLOCK_WIDTH + 5)
#define NUM_PIXELS_Y (MP_BLOCK_HEIGHT + 5)
#define CURRENT_PASS 1
groupshared float samples[NUM_PIXELS_X * NUM_PIXELS_Y];
float GetLuma(float3 color) {
return dot(float3(0.299f, 0.587f, 0.114f), color);
#define GET_SAMPLE(x) dot(x.rgb, rgb2y)
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val.x)
void imageStoreOverride(uint2 pos, float value) {
float2 UV = mul(rgb2uv, INPUT.SampleLevel(sam_INPUT_LINEAR, HOOKED_map(pos), 0).rgb);
OUTPUT[pos] = float4(mul(yuv2rgb, float3(value.x, UV)), 1.0);
}
#define PI 3.1415926535897932384626433832795
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
// https://github.com/mpv-player/mpv/issues/9390#issuecomment-961082863
#define LUT_POS(x, lut_size) lerp(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
const static float3x3 yuv2rgb = {
1, -0.00093, 1.401687,
1, -0.3437, -0.71417,
1, 1.77216, 0.00099
};
const static float2x3 rgb2uv = {
-0.169, -0.331, 0.5,
0.5, -0.419, -0.081
};
float mod(float x, float y) {
return x - y * floor(x / y);
}
#define ravu_zoom_lut3_tex(pos) (vec4(texture(ravu_zoom_lut3, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
const float2 inputPt = GetInputPt();
const uint2 inputSize = GetInputSize();
const float2 rcpScale = rcp(GetScale());
const int2 rectl = floor(blockStart * rcpScale - 0.5f) - 2;
const int2 rectr = floor((blockStart + uint2(MP_BLOCK_WIDTH, MP_BLOCK_HEIGHT)) * rcpScale - 0.5f) + 3;
const uint2 rect = uint2(rectr - rectl + 1);
const int maxId = int(rect.x * rect.y);
for (int id = int(threadId.y * MP_NUM_THREADS_X + threadId.x); id < maxId; id += MP_NUM_THREADS_X * MP_NUM_THREADS_Y) {
ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1);
ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 2;
ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 3;
ivec2 rect = rectr - rectl + 1;
for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y;
id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint y = (uint)id / rect.x, x = (uint)id % rect.x;
samples[x + y * NUM_PIXELS_X] = GetLuma(INPUT.SampleLevel(sam, inputPt * (rectl + uint2(x, y) + 0.5f), 0).rgb);
samples[x + y * 38] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).x;
}
GroupMemoryBarrierWithGroupSync();
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy;
if (!CheckViewport(destPos)) {
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
float2 pos = (destPos + 0.5f) * rcpScale;
float2 subpix = frac(pos - 0.5f);
#endif
vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID));
vec2 subpix = fract(pos - 0.5);
pos -= subpix;
subpix = LUT_POS(subpix, 9);
float2 subpix_inv = 1.0 - subpix;
subpix /= float2(5.0, 288.0);
subpix_inv /= float2(5.0, 288.0);
uint2 ipos = uint2(floor(pos) - rectl);
uint lpos = ipos.x + ipos.y * NUM_PIXELS_X;
float sample0 = samples[lpos - 2 * NUM_PIXELS_X - 2];
float sample1 = samples[lpos - NUM_PIXELS_X - 2];
float sample2 = samples[lpos - 2];
float sample3 = samples[lpos + NUM_PIXELS_X - 2];
float sample4 = samples[lpos + 2 * NUM_PIXELS_X - 2];
float sample5 = samples[lpos + 3 * NUM_PIXELS_X - 2];
float sample6 = samples[lpos - 2 * NUM_PIXELS_X - 1];
float sample7 = samples[lpos - NUM_PIXELS_X - 1];
float sample8 = samples[lpos - 1];
float sample9 = samples[lpos + NUM_PIXELS_X - 1];
float sample10 = samples[lpos + 2 * NUM_PIXELS_X - 1];
float sample11 = samples[lpos + 3 * NUM_PIXELS_X - 1];
float sample12 = samples[lpos - 2 * NUM_PIXELS_X];
float sample13 = samples[lpos - NUM_PIXELS_X];
float sample14 = samples[lpos];
float sample15 = samples[lpos + NUM_PIXELS_X];
float sample16 = samples[lpos + 2 * NUM_PIXELS_X];
float sample17 = samples[lpos + 3 * NUM_PIXELS_X];
float sample18 = samples[lpos - 2 * NUM_PIXELS_X + 1];
float sample19 = samples[lpos - NUM_PIXELS_X + 1];
float sample20 = samples[lpos + 1];
float sample21 = samples[lpos + NUM_PIXELS_X + 1];
float sample22 = samples[lpos + 2 * NUM_PIXELS_X + 1];
float sample23 = samples[lpos + 3 * NUM_PIXELS_X + 1];
float sample24 = samples[lpos - 2 * NUM_PIXELS_X + 2];
float sample25 = samples[lpos - NUM_PIXELS_X + 2];
float sample26 = samples[lpos + 2];
float sample27 = samples[lpos + NUM_PIXELS_X + 2];
float sample28 = samples[lpos + 2 * NUM_PIXELS_X + 2];
float sample29 = samples[lpos + 3 * NUM_PIXELS_X + 2];
float sample30 = samples[lpos - 2 * NUM_PIXELS_X + 3];
float sample31 = samples[lpos - NUM_PIXELS_X + 3];
float sample32 = samples[lpos + 3];
float sample33 = samples[lpos + NUM_PIXELS_X + 3];
float sample34 = samples[lpos + 2 * NUM_PIXELS_X + 3];
float sample35 = samples[lpos + 3 * NUM_PIXELS_X + 3];
float3 abd = 0;
subpix = LUTPOS(subpix, vec2(9.0, 9.0));
vec2 subpix_inv = 1.0 - subpix;
subpix /= vec2(5.0, 288.0);
subpix_inv /= vec2(5.0, 288.0);
ivec2 ipos = ivec2(floor(pos)) - rectl;
int lpos = ipos.x + ipos.y * 38;
float sample0 = samples[-78 + lpos];
float sample1 = samples[-40 + lpos];
float sample2 = samples[-2 + lpos];
float sample3 = samples[36 + lpos];
float sample4 = samples[74 + lpos];
float sample5 = samples[112 + lpos];
float sample6 = samples[-77 + lpos];
float sample7 = samples[-39 + lpos];
float sample8 = samples[-1 + lpos];
float sample9 = samples[37 + lpos];
float sample10 = samples[75 + lpos];
float sample11 = samples[113 + lpos];
float sample12 = samples[-76 + lpos];
float sample13 = samples[-38 + lpos];
float sample14 = samples[0 + lpos];
float sample15 = samples[38 + lpos];
float sample16 = samples[76 + lpos];
float sample17 = samples[114 + lpos];
float sample18 = samples[-75 + lpos];
float sample19 = samples[-37 + lpos];
float sample20 = samples[1 + lpos];
float sample21 = samples[39 + lpos];
float sample22 = samples[77 + lpos];
float sample23 = samples[115 + lpos];
float sample24 = samples[-74 + lpos];
float sample25 = samples[-36 + lpos];
float sample26 = samples[2 + lpos];
float sample27 = samples[40 + lpos];
float sample28 = samples[78 + lpos];
float sample29 = samples[116 + lpos];
float sample30 = samples[-73 + lpos];
float sample31 = samples[-35 + lpos];
float sample32 = samples[3 + lpos];
float sample33 = samples[41 + lpos];
float sample34 = samples[79 + lpos];
float sample35 = samples[117 + lpos];
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (sample13 - sample1) / 2.0;
gy = (sample8 - sample6) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample14 - sample2) / 2.0;
gy = (-sample10 + 8.0 * sample9 - 8.0 * sample7 + sample6) / 12.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample15 - sample3) / 2.0;
gy = (-sample11 + 8.0 * sample10 - 8.0 * sample8 + sample7) / 12.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample16 - sample4) / 2.0;
gy = (sample11 - sample9) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-sample25 + 8.0 * sample19 - 8.0 * sample7 + sample1) / 12.0;
gy = (sample14 - sample12) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-sample26 + 8.0 * sample20 - 8.0 * sample8 + sample2) / 12.0;
gy = (-sample16 + 8.0 * sample15 - 8.0 * sample13 + sample12) / 12.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-sample27 + 8.0 * sample21 - 8.0 * sample9 + sample3) / 12.0;
gy = (-sample17 + 8.0 * sample16 - 8.0 * sample14 + sample13) / 12.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-sample28 + 8.0 * sample22 - 8.0 * sample10 + sample4) / 12.0;
gy = (sample17 - sample15) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-sample31 + 8.0 * sample25 - 8.0 * sample13 + sample7) / 12.0;
gy = (sample20 - sample18) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-sample32 + 8.0 * sample26 - 8.0 * sample14 + sample8) / 12.0;
gy = (-sample22 + 8.0 * sample21 - 8.0 * sample19 + sample18) / 12.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-sample33 + 8.0 * sample27 - 8.0 * sample15 + sample9) / 12.0;
gy = (-sample23 + 8.0 * sample22 - 8.0 * sample20 + sample19) / 12.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-sample34 + 8.0 * sample28 - 8.0 * sample16 + sample10) / 12.0;
gy = (sample23 - sample21) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample31 - sample19) / 2.0;
gy = (sample26 - sample24) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (sample32 - sample20) / 2.0;
gy = (-sample28 + 8.0 * sample27 - 8.0 * sample25 + sample24) / 12.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample33 - sample21) / 2.0;
gy = (-sample29 + 8.0 * sample28 - 8.0 * sample26 + sample25) / 12.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (sample34 - sample22) / 2.0;
gy = (sample29 - sample27) / 2.0;
abd += float3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = lerp(mod(atan2(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = lerp((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = lerp(lerp(0.0, 1.0, lambda >= 0.004), lerp(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = lerp(lerp(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0;
float res = 0.0;
float4 w;
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.0, coord_y) + subpix, 0);
vec4 w;
w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix);
res += sample0 * w[0];
res += sample1 * w[1];
res += sample2 * w[2];
res += sample3 * w[3];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.2, coord_y) + subpix, 0);
w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix);
res += sample4 * w[0];
res += sample5 * w[1];
res += sample6 * w[2];
res += sample7 * w[3];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.4, coord_y) + subpix, 0);
w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix);
res += sample8 * w[0];
res += sample9 * w[1];
res += sample10 * w[2];
res += sample11 * w[3];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.6, coord_y) + subpix, 0);
w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix);
res += sample12 * w[0];
res += sample13 * w[1];
res += sample14 * w[2];
res += sample15 * w[3];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.8, coord_y) + subpix, 0);
w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix);
res += sample16 * w[0];
res += sample17 * w[1];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.0, coord_y) + subpix_inv, 0);
w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix_inv);
res += sample35 * w[0];
res += sample34 * w[1];
res += sample33 * w[2];
res += sample32 * w[3];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.2, coord_y) + subpix_inv, 0);
w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix_inv);
res += sample31 * w[0];
res += sample30 * w[1];
res += sample29 * w[2];
res += sample28 * w[3];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.4, coord_y) + subpix_inv, 0);
w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix_inv);
res += sample27 * w[0];
res += sample26 * w[1];
res += sample25 * w[2];
res += sample24 * w[3];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.6, coord_y) + subpix_inv, 0);
w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix_inv);
res += sample23 * w[0];
res += sample22 * w[1];
res += sample21 * w[2];
res += sample20 * w[3];
w = ravu_zoom_lut3.SampleLevel(sam1, float2(0.8, coord_y) + subpix_inv, 0);
w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix_inv);
res += sample19 * w[0];
res += sample18 * w[1];
res = saturate(res);
float2 originUV = mul(rgb2uv, INPUT.SampleLevel(sam1, (destPos + 0.5f) * GetOutputPt(), 0).rgb);
WriteToOutput(destPos, mul(yuv2rgb, float3(res, originUV)));
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), res);
}

View file

@ -0,0 +1,281 @@
// This file is generated by the scripts available at https://github.com/hauuau/magpie-prescalers
// Please don't edit this file directly.
// Generated by: ravu-zoom.py --target rgb --weights-file weights\ravu-zoom_weights-r3.py --float-format float16dx --use-compute-shader --use-magpie --overwrite
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//!MAGPIE EFFECT
//!VERSION 4
//!TEXTURE
Texture2D INPUT;
//!SAMPLER
//!FILTER POINT
SamplerState sam_INPUT;
//!TEXTURE
//
//
Texture2D OUTPUT;
//!TEXTURE
//!SOURCE ravu_zoom_lut3_f16.dds
//!FORMAT R16G16B16A16_FLOAT
Texture2D ravu_zoom_lut3;
//!SAMPLER
//!FILTER LINEAR
SamplerState sam_ravu_zoom_lut3;
//!COMMON
#include "prescalers.hlsli"
#define LAST_PASS 1
//!PASS 1
//!DESC RAVU-Zoom (rgb, r3, compute)
//!IN INPUT, ravu_zoom_lut3
//!OUT OUTPUT
//!BLOCK_SIZE 32, 8
//!NUM_THREADS 32, 8
static const vec3 color_primary = vec3(0.2126, 0.7152, 0.0722);
#define LUTPOS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))
shared vec3 samples[532];
#define CURRENT_PASS 1
#define GET_SAMPLE(x) x
#define imageStore(out_image, pos, val) imageStoreOverride(pos, val)
void imageStoreOverride(uint2 pos, float4 value) { OUTPUT[pos] = value; }
#define INPUT_tex(pos) GET_SAMPLE(vec4(texture(INPUT, pos)))
static const float2 INPUT_size = float2(GetInputSize());
static const float2 INPUT_pt = float2(GetInputPt());
#define ravu_zoom_lut3_tex(pos) (vec4(texture(ravu_zoom_lut3, pos)))
#define HOOKED_tex(pos) INPUT_tex(pos)
#define HOOKED_size INPUT_size
#define HOOKED_pt INPUT_pt
void Pass1(uint2 blockStart, uint3 threadId) {
ivec2 group_begin = ivec2(gl_WorkGroupID) * ivec2(gl_WorkGroupSize);
ivec2 group_end = group_begin + ivec2(gl_WorkGroupSize) - ivec2(1, 1);
ivec2 rectl = ivec2(floor(HOOKED_size * HOOKED_map(group_begin) - 0.5001)) - 2;
ivec2 rectr = ivec2(floor(HOOKED_size * HOOKED_map(group_end) - 0.4999)) + 3;
ivec2 rect = rectr - rectl + 1;
for (int id = int(gl_LocalInvocationIndex); id < rect.x * rect.y;
id += int(gl_WorkGroupSize.x * gl_WorkGroupSize.y)) {
uint y = (uint)id / rect.x, x = (uint)id % rect.x;
samples[x + y * 38] = HOOKED_tex(HOOKED_pt * (vec2(rectl + ivec2(x, y)) + vec2(0.5, 0.5))).xyz;
}
barrier();
#if CURRENT_PASS == LAST_PASS
uint2 destPos = blockStart + threadId.xy;
uint2 outputSize = GetOutputSize();
if (destPos.x >= outputSize.x || destPos.y >= outputSize.y) {
return;
}
#endif
vec2 pos = HOOKED_size * HOOKED_map(ivec2(gl_GlobalInvocationID));
vec2 subpix = fract(pos - 0.5);
pos -= subpix;
subpix = LUTPOS(subpix, vec2(9.0, 9.0));
vec2 subpix_inv = 1.0 - subpix;
subpix /= vec2(5.0, 288.0);
subpix_inv /= vec2(5.0, 288.0);
ivec2 ipos = ivec2(floor(pos)) - rectl;
int lpos = ipos.x + ipos.y * 38;
vec3 sample0 = samples[-78 + lpos];
vec3 sample1 = samples[-40 + lpos];
vec3 sample2 = samples[-2 + lpos];
vec3 sample3 = samples[36 + lpos];
vec3 sample4 = samples[74 + lpos];
vec3 sample5 = samples[112 + lpos];
vec3 sample6 = samples[-77 + lpos];
vec3 sample7 = samples[-39 + lpos];
vec3 sample8 = samples[-1 + lpos];
vec3 sample9 = samples[37 + lpos];
vec3 sample10 = samples[75 + lpos];
vec3 sample11 = samples[113 + lpos];
vec3 sample12 = samples[-76 + lpos];
vec3 sample13 = samples[-38 + lpos];
vec3 sample14 = samples[0 + lpos];
vec3 sample15 = samples[38 + lpos];
vec3 sample16 = samples[76 + lpos];
vec3 sample17 = samples[114 + lpos];
vec3 sample18 = samples[-75 + lpos];
vec3 sample19 = samples[-37 + lpos];
vec3 sample20 = samples[1 + lpos];
vec3 sample21 = samples[39 + lpos];
vec3 sample22 = samples[77 + lpos];
vec3 sample23 = samples[115 + lpos];
vec3 sample24 = samples[-74 + lpos];
vec3 sample25 = samples[-36 + lpos];
vec3 sample26 = samples[2 + lpos];
vec3 sample27 = samples[40 + lpos];
vec3 sample28 = samples[78 + lpos];
vec3 sample29 = samples[116 + lpos];
vec3 sample30 = samples[-73 + lpos];
vec3 sample31 = samples[-35 + lpos];
vec3 sample32 = samples[3 + lpos];
vec3 sample33 = samples[41 + lpos];
vec3 sample34 = samples[79 + lpos];
vec3 sample35 = samples[117 + lpos];
float luma1 = dot(sample1, color_primary);
float luma2 = dot(sample2, color_primary);
float luma3 = dot(sample3, color_primary);
float luma4 = dot(sample4, color_primary);
float luma6 = dot(sample6, color_primary);
float luma7 = dot(sample7, color_primary);
float luma8 = dot(sample8, color_primary);
float luma9 = dot(sample9, color_primary);
float luma10 = dot(sample10, color_primary);
float luma11 = dot(sample11, color_primary);
float luma12 = dot(sample12, color_primary);
float luma13 = dot(sample13, color_primary);
float luma14 = dot(sample14, color_primary);
float luma15 = dot(sample15, color_primary);
float luma16 = dot(sample16, color_primary);
float luma17 = dot(sample17, color_primary);
float luma18 = dot(sample18, color_primary);
float luma19 = dot(sample19, color_primary);
float luma20 = dot(sample20, color_primary);
float luma21 = dot(sample21, color_primary);
float luma22 = dot(sample22, color_primary);
float luma23 = dot(sample23, color_primary);
float luma24 = dot(sample24, color_primary);
float luma25 = dot(sample25, color_primary);
float luma26 = dot(sample26, color_primary);
float luma27 = dot(sample27, color_primary);
float luma28 = dot(sample28, color_primary);
float luma29 = dot(sample29, color_primary);
float luma31 = dot(sample31, color_primary);
float luma32 = dot(sample32, color_primary);
float luma33 = dot(sample33, color_primary);
float luma34 = dot(sample34, color_primary);
vec3 abd = vec3(0.0, 0.0, 0.0);
float gx, gy;
gx = (luma13 - luma1) / 2.0;
gy = (luma8 - luma6) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma14 - luma2) / 2.0;
gy = (-luma10 + 8.0 * luma9 - 8.0 * luma7 + luma6) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma15 - luma3) / 2.0;
gy = (-luma11 + 8.0 * luma10 - 8.0 * luma8 + luma7) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma16 - luma4) / 2.0;
gy = (luma11 - luma9) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (-luma25 + 8.0 * luma19 - 8.0 * luma7 + luma1) / 12.0;
gy = (luma14 - luma12) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma26 + 8.0 * luma20 - 8.0 * luma8 + luma2) / 12.0;
gy = (-luma16 + 8.0 * luma15 - 8.0 * luma13 + luma12) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma27 + 8.0 * luma21 - 8.0 * luma9 + luma3) / 12.0;
gy = (-luma17 + 8.0 * luma16 - 8.0 * luma14 + luma13) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma28 + 8.0 * luma22 - 8.0 * luma10 + luma4) / 12.0;
gy = (luma17 - luma15) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma31 + 8.0 * luma25 - 8.0 * luma13 + luma7) / 12.0;
gy = (luma20 - luma18) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (-luma32 + 8.0 * luma26 - 8.0 * luma14 + luma8) / 12.0;
gy = (-luma22 + 8.0 * luma21 - 8.0 * luma19 + luma18) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma33 + 8.0 * luma27 - 8.0 * luma15 + luma9) / 12.0;
gy = (-luma23 + 8.0 * luma22 - 8.0 * luma20 + luma19) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.07901060453704994;
gx = (-luma34 + 8.0 * luma28 - 8.0 * luma16 + luma10) / 12.0;
gy = (luma23 - luma21) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma31 - luma19) / 2.0;
gy = (luma26 - luma24) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
gx = (luma32 - luma20) / 2.0;
gy = (-luma28 + 8.0 * luma27 - 8.0 * luma25 + luma24) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma33 - luma21) / 2.0;
gy = (-luma29 + 8.0 * luma28 - 8.0 * luma26 + luma25) / 12.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.06153352068439959;
gx = (luma34 - luma22) / 2.0;
gy = (luma29 - luma27) / 2.0;
abd += vec3(gx * gx, gx * gy, gy * gy) * 0.04792235409415088;
float a = abd.x, b = abd.y, d = abd.z;
float T = a + d, D = a * d - b * b;
float delta = sqrt(max(T * T / 4.0 - D, 0.0));
float L1 = T / 2.0 + delta, L2 = T / 2.0 - delta;
float sqrtL1 = sqrt(L1), sqrtL2 = sqrt(L2);
float theta = mix(mod(atan(L1 - a, b) + 3.141592653589793, 3.141592653589793), 0.0, abs(b) < 1.192092896e-7);
float lambda = sqrtL1;
float mu = mix((sqrtL1 - sqrtL2) / (sqrtL1 + sqrtL2), 0.0, sqrtL1 + sqrtL2 < 1.192092896e-7);
float angle = floor(theta * 24.0 / 3.141592653589793);
float strength = mix(mix(0.0, 1.0, lambda >= 0.004), mix(2.0, 3.0, lambda >= 0.05), lambda >= 0.016);
float coherence = mix(mix(0.0, 1.0, mu >= 0.25), 2.0, mu >= 0.5);
float coord_y = ((angle * 4.0 + strength) * 3.0 + coherence) / 288.0;
vec3 res = vec3(0.0, 0.0, 0.0);
vec4 w;
w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix);
res += sample0 * w[0];
res += sample1 * w[1];
res += sample2 * w[2];
res += sample3 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix);
res += sample4 * w[0];
res += sample5 * w[1];
res += sample6 * w[2];
res += sample7 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix);
res += sample8 * w[0];
res += sample9 * w[1];
res += sample10 * w[2];
res += sample11 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix);
res += sample12 * w[0];
res += sample13 * w[1];
res += sample14 * w[2];
res += sample15 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix);
res += sample16 * w[0];
res += sample17 * w[1];
w = texture(ravu_zoom_lut3, vec2(0.0, coord_y) + subpix_inv);
res += sample35 * w[0];
res += sample34 * w[1];
res += sample33 * w[2];
res += sample32 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.2, coord_y) + subpix_inv);
res += sample31 * w[0];
res += sample30 * w[1];
res += sample29 * w[2];
res += sample28 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.4, coord_y) + subpix_inv);
res += sample27 * w[0];
res += sample26 * w[1];
res += sample25 * w[2];
res += sample24 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.6, coord_y) + subpix_inv);
res += sample23 * w[0];
res += sample22 * w[1];
res += sample21 * w[2];
res += sample20 * w[3];
w = texture(ravu_zoom_lut3, vec2(0.8, coord_y) + subpix_inv);
res += sample19 * w[0];
res += sample18 * w[1];
res = clamp(res, 0.0, 1.0);
imageStore(out_image, ivec2(gl_GlobalInvocationID), vec4(res, 1.0));
}

View file

@ -0,0 +1,73 @@
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Conversion from GLSL to HLSL is done through defines as much as possible to ease synchronization and comparison with upstream
#define ivec2 int2
#define vec2 float2
#define vec3 float3
#define vec4 float4
#define mat4x3 float4x3
#define matrixCompMult(mtx1, mtx2) (mtx1 * mtx2)
#define shared groupshared
#define atan atan2
#define barrier GroupMemoryBarrierWithGroupSync
#define fract frac
#define intBitsToFloat asfloat
#define inversesqrt rsqrt
// mod deals only with positive numbers here and it could be substituted by fmod
#define mod fmod
// lerp handles bools as the third argument differently from mix
float mix(float a, float b, bool c) {
return c ? b : a;
}
#define MIX_LERP(type1, type3) type1 mix(type1 a, type1 b, type3 c) { return lerp(a, b, c); }
MIX_LERP(float, float)
MIX_LERP(float2, float2)
MIX_LERP(float3, float)
MIX_LERP(float4, float)
#define texture(tex, pos) tex.SampleLevel(sam_##tex, pos, 0.0)
#define OUTPUT_pt float2(GetOutputPt())
#define frag_pos(id) (vec2(id) + vec2(0.5, 0.5))
#define frag_map(id) (OUTPUT_pt * frag_pos(id))
#define HOOKED_map(id) frag_map(id)
#define gl_LocalInvocationIndex (threadId.y*MP_NUM_THREADS_X + threadId.x)
#define gl_LocalInvocationID threadId
#define gl_WorkGroupSize (uint2(MP_NUM_THREADS_X, MP_NUM_THREADS_Y))
#define gl_WorkGroupID (blockStart / uint2(MP_BLOCK_WIDTH, MP_BLOCK_HEIGHT))
#define gl_GlobalInvocationID (gl_WorkGroupID*gl_WorkGroupSize + threadId.xy)
// disable warning about unknown pragma
#pragma warning(disable: 3568)
// disable warning about too many threads (ravu-r4-rgb triggers it)
#pragma warning(disable: 4714)
// https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-6-201506-I!!PDF-E.pdf
static const float3 rgb2y = float3(0.2126, 0.7152, 0.0722);
static const float2x3 rgb2uv = {
-0.2126/1.8556, -0.7152/1.8556, 0.9278/1.8556,
0.7874/1.5748, -0.7152/1.5748, -0.0722/1.5748
};
static const float3x3 yuv2rgb = {
1, 0, 1.5748,
1, -0.187324, -0.468124,
1, 1.8556, 0
};

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -5,7 +5,7 @@
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
<ClCompile>
<!-- 使用 MultiThreadedDebug 而不是 MultiThreadedDebugDLL 以避免对 VCRUNTIME140d.dll 和 MSVCP140d.dll 的依赖 -->
<!-- 避免依赖 VCRUNTIME140d.dll 和 MSVCP140d.dll -->
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
</ClCompile>
<Link>
@ -16,7 +16,7 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
<!-- 使用 MultiThreaded 而不是 MultiThreadedDLL 以避免对 VCRUNTIME140.dll 和 MSVCP140.dll 的依赖 -->
<!-- 避免依赖 VCRUNTIME140.dll 和 MSVCP140.dll -->
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
</ClCompile>
<Link>

View file

@ -4,6 +4,7 @@
#include "AboutPage.g.cpp"
#endif
#include "Win32Utils.h"
#include "CommonSharedConstants.h"
namespace winrt::Magpie::App::implementation {
@ -12,7 +13,8 @@ void AboutPage::VersionTextBlock_DoubleTapped(IInspectable const&, Input::Double
if (!_viewModel.IsDeveloperMode() && (GetAsyncKeyState(VK_MENU) & 0x8000)) {
_viewModel.IsDeveloperMode(true);
hstring message = ResourceLoader::GetForCurrentView().GetString(L"About_DeveloperModeEnabled");
const hstring message = ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID)
.GetString(L"About_DeveloperModeEnabled");
Application::Current().as<App>().RootPage().ShowToast(message);
}
}

View file

@ -8,6 +8,7 @@
#include "AppSettings.h"
#include "StrUtils.h"
#include "IconHelper.h"
#include "CommonSharedConstants.h"
using namespace winrt;
using namespace Windows::UI::Xaml::Media::Imaging;
@ -56,7 +57,8 @@ AboutViewModel::AboutViewModel() {
}
hstring AboutViewModel::Version() const noexcept {
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
return hstring(StrUtils::Concat(
resourceLoader.GetString(L"About_Version_Version"),
#ifdef MAGPIE_VERSION_TAG
@ -208,7 +210,8 @@ hstring AboutViewModel::UpdateCardTitle() const noexcept {
return {};
}
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring titleFmt = resourceLoader.GetString(L"Home_UpdateCard_Title");
return hstring(fmt::format(fmt::runtime(std::wstring_view(titleFmt)), updateService.Tag()));
}

View file

@ -126,15 +126,6 @@ void App::IsShowTrayIconChanged(event_token const& token) {
AppSettings::Get().IsShowTrayIconChanged(token);
}
void App::HwndMain(uint64_t value) noexcept {
if (_hwndMain == (HWND)value) {
return;
}
_hwndMain = (HWND)value;
_hwndMainChangedEvent(*this, value);
}
void App::RootPage(Magpie::App::RootPage const& rootPage) noexcept {
// 显示主窗口前等待 EffectsService 完成初始化
EffectsService::Get().WaitForInitialize();

View file

@ -27,14 +27,8 @@ public:
return (uint64_t)_hwndMain;
}
void HwndMain(uint64_t value) noexcept;
event_token HwndMainChanged(EventHandler<uint64_t> const& handler) {
return _hwndMainChangedEvent.add(handler);
}
void HwndMainChanged(event_token const& token) noexcept {
_hwndMainChangedEvent.remove(token);
void HwndMain(uint64_t value) noexcept {
_hwndMain = (HWND)value;
}
// 在由外部源引发的回调中可能返回 nullptr
@ -51,14 +45,8 @@ public:
private:
Hosting::WindowsXamlManager _windowsXamlManager{ nullptr };
HWND _hwndMain{};
event<EventHandler<uint64_t>> _hwndMainChangedEvent;
weak_ref<Magpie::App::RootPage> _rootPage{ nullptr };
event<EventHandler<bool>> _hostWndFocusChangedEvent;
bool _isHostWndFocused = false;
HWND _hwndMain = NULL;
bool _isClosed = false;
////////////////////////////////////////////////////

View file

@ -11,6 +11,8 @@ namespace Magpie.App {
#include "TextBlockHelper.idl"
#include "SimpleStackPanel.idl"
#include "WrapPanel.idl"
#include "CaptionButtonsControl.idl"
#include "TitleBarControl.idl"
#include "PageFrame.idl"
#include "SettingsCard.idl"
#include "SettingsExpander.idl"
@ -35,8 +37,6 @@ namespace Magpie.App {
#include "ScalingConfigurationPage.idl"
#include "ProfilePage.idl"
#include "SettingsPage.idl"
#include "CaptionButtonsControl.idl"
#include "TitleBarControl.idl"
namespace Magpie.App {
enum ShortcutAction {
@ -71,7 +71,6 @@ namespace Magpie.App {
event Windows.Foundation.EventHandler<Boolean> IsShowTrayIconChanged;
UInt64 HwndMain;
event Windows.Foundation.EventHandler<UInt64> HwndMainChanged;
RootPage RootPage;

View file

@ -7,10 +7,10 @@
<ResourceDictionary>
<ResourceDictionary.MergedDictionaries>
<muxc:XamlControlsResources ControlsResourcesVersion="Version2" />
<ResourceDictionary Source="ms-appx:///KeyVisual.Resource.xaml" />
<ResourceDictionary Source="ms-appx:///SettingsCard.Resource.xaml" />
<ResourceDictionary Source="ms-appx:///SettingsExpander.Resource.xaml" />
<ResourceDictionary Source="ms-appx:///SettingsGroup.Resource.xaml" />
<ResourceDictionary Source="ms-appx:///Magpie.App/KeyVisual.Resource.xaml" />
<ResourceDictionary Source="ms-appx:///Magpie.App/SettingsCard.Resource.xaml" />
<ResourceDictionary Source="ms-appx:///Magpie.App/SettingsExpander.Resource.xaml" />
<ResourceDictionary Source="ms-appx:///Magpie.App/SettingsGroup.Resource.xaml" />
</ResourceDictionary.MergedDictionaries>
<local:BoolNegationConverter x:Key="NegationConverter" />

View file

@ -161,8 +161,10 @@ static HRESULT CALLBACK TaskDialogCallback(
}
static void ShowErrorMessage(const wchar_t* mainInstruction, const wchar_t* content) noexcept {
hstring errorStr = ResourceLoader::GetForCurrentView().GetString(L"AppSettings_Dialog_Error");
hstring exitStr = ResourceLoader::GetForCurrentView().GetString(L"AppSettings_Dialog_Exit");
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
const hstring errorStr = resourceLoader.GetString(L"AppSettings_Dialog_Error");
const hstring exitStr = resourceLoader.GetString(L"AppSettings_Dialog_Exit");
TASKDIALOGCONFIG tdc{ sizeof(TASKDIALOGCONFIG) };
tdc.dwFlags = TDF_SIZE_TO_CONTENT;
@ -186,7 +188,8 @@ static bool ShowOkCancelWarningMessage(
) noexcept {
TASKDIALOGCONFIG tdc{ sizeof(TASKDIALOGCONFIG) };
tdc.dwFlags = TDF_SIZE_TO_CONTENT;
hstring warningStr = ResourceLoader::GetForCurrentView().GetString(L"AppSettings_Dialog_Warning");
const hstring warningStr = ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID)
.GetString(L"AppSettings_Dialog_Warning");
tdc.pszWindowTitle = warningStr.c_str();
tdc.pszMainIcon = TD_WARNING_ICON;
tdc.pszMainInstruction = mainInstruction;
@ -229,7 +232,8 @@ bool AppSettings::Initialize() noexcept {
std::string configText;
if (!Win32Utils::ReadTextFile(_configPath.c_str(), configText)) {
logger.Error("读取配置文件失败");
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring title = resourceLoader.GetString(L"AppSettings_ErrorDialog_ReadFailed");
hstring content = resourceLoader.GetString(L"AppSettings_ErrorDialog_ConfigLocation");
ShowErrorMessage(title.c_str(), fmt::format(fmt::runtime(std::wstring_view(content)), _configPath).c_str());
@ -248,7 +252,8 @@ bool AppSettings::Initialize() noexcept {
doc.ParseInsitu(configText.data());
if (doc.HasParseError()) {
Logger::Get().Error(fmt::format("解析配置失败\n\t错误码:{}", (int)doc.GetParseError()));
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring title = resourceLoader.GetString(L"AppSettings_ErrorDialog_NotValidJson");
hstring content = resourceLoader.GetString(L"AppSettings_ErrorDialog_ConfigLocation");
ShowErrorMessage(title.c_str(), fmt::format(fmt::runtime(std::wstring_view(content)), _configPath).c_str());
@ -257,7 +262,8 @@ bool AppSettings::Initialize() noexcept {
if (!doc.IsObject()) {
Logger::Get().Error("配置文件根元素不是 Object");
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring title = resourceLoader.GetString(L"AppSettings_ErrorDialog_ParseFailed");
hstring content = resourceLoader.GetString(L"AppSettings_ErrorDialog_ConfigLocation");
ShowErrorMessage(title.c_str(), fmt::format(fmt::runtime(std::wstring_view(content)), _configPath).c_str());
@ -273,7 +279,8 @@ bool AppSettings::Initialize() noexcept {
if (settingsVersion > SETTINGS_VERSION) {
Logger::Get().Warn("未知的配置文件版本");
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
if (_isPortableMode) {
hstring contentStr = resourceLoader.GetString(
L"AppSettings_PortableModeUnkownConfiguration_Content");

View file

@ -64,8 +64,10 @@ void CaptionButtonsControl::ReleaseButton(CaptionButton button) {
switch (_pressedButton.value()) {
case CaptionButton::Minimize:
{
PostMessage(hwndMain, WM_SYSCOMMAND, SC_MINIMIZE, 0);
break;
}
case CaptionButton::Maximize:
{
POINT cursorPos;
@ -80,9 +82,11 @@ void CaptionButtonsControl::ReleaseButton(CaptionButton button) {
break;
}
case CaptionButton::Close:
{
PostMessage(hwndMain, WM_SYSCOMMAND, SC_CLOSE, 0);
break;
}
}
}
_pressedButton.reset();

View file

@ -8,6 +8,7 @@
#include "Win32Utils.h"
#include "StrUtils.h"
#include "UpdateService.h"
#include "CommonSharedConstants.h"
namespace winrt::Magpie::App::implementation {
@ -51,7 +52,8 @@ hstring HomeViewModel::TimerLabelText() const noexcept {
hstring HomeViewModel::TimerButtonText() const noexcept {
ScalingService& ScalingService = ScalingService::Get();
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
if (ScalingService.IsTimerOn()) {
return resourceLoader.GetString(L"Home_Timer_Cancel");
} else {
@ -125,7 +127,8 @@ hstring HomeViewModel::RestoreWndDesc() const noexcept {
std::wstring title(GetWindowTextLength(wndToRestore), L'\0');
GetWindowText(wndToRestore, title.data(), (int)title.size() + 1);
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring curWindow = resourceLoader.GetString(L"Home_AutoRestore_CurWindow");
if (title.empty()) {
hstring emptyTitle = resourceLoader.GetString(L"Home_AutoRestore_EmptyTitle");
@ -151,7 +154,8 @@ hstring HomeViewModel::UpdateCardTitle() const noexcept {
return {};
}
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring titleFmt = resourceLoader.GetString(L"About_Version_UpdateCard_Title");
return hstring(fmt::format(fmt::runtime(std::wstring_view(titleFmt)), updateService.Tag()));
}

View file

@ -41,7 +41,7 @@ void LocalizationService::EarlyInitialize() {
double bestScore = 0.0;
// 没有支持的语言则回落到英语
const wchar_t* bestLanguage = L"en-US";
const wchar_t* bestLanguage = L"en-us";
for (const wchar_t* language : SUPPORTED_LANGUAGES) {
double score = 0.0;
HRESULT hr = GetDistanceOfClosestLanguageInList(language, userLanguages.data(), 0, &score);

View file

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="..\..\packages\Microsoft.Windows.CppWinRT.2.0.240111.5\build\native\Microsoft.Windows.CppWinRT.props" Condition="Exists('..\..\packages\Microsoft.Windows.CppWinRT.2.0.240111.5\build\native\Microsoft.Windows.CppWinRT.props')" />
<Import Project="..\..\packages\Microsoft.UI.Xaml.2.8.6\build\native\Microsoft.UI.Xaml.props" Condition="Exists('..\..\packages\Microsoft.UI.Xaml.2.8.6\build\native\Microsoft.UI.Xaml.props')" />
<Import Project="..\..\packages\Microsoft.Windows.CppWinRT.2.0.230706.1\build\native\Microsoft.Windows.CppWinRT.props" Condition="Exists('..\..\packages\Microsoft.Windows.CppWinRT.2.0.230706.1\build\native\Microsoft.Windows.CppWinRT.props')" />
<PropertyGroup Label="Globals">
<CppWinRTGenerateWindowsMetadata>true</CppWinRTGenerateWindowsMetadata>
<MinimalCoreWin>true</MinimalCoreWin>
@ -14,22 +14,22 @@
<ApplicationTypeRevision>10.0</ApplicationTypeRevision>
<WindowsTargetPlatformVersion>10.0.22621.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformMinVersion>10.0.18362.0</WindowsTargetPlatformMinVersion>
<WindowsAppContainer>true</WindowsAppContainer>
<AppxGeneratePriEnabled>true</AppxGeneratePriEnabled>
<ProjectPriIndexName>Magpie.App</ProjectPriIndexName>
<AppxPackage>true</AppxPackage>
<AppxBundlePlatforms>$(Platform)</AppxBundlePlatforms>
<AppxDefaultResourceQualifiers>Language=en-US</AppxDefaultResourceQualifiers>
<GenerateAppxPackageOnBuild>false</GenerateAppxPackageOnBuild>
<OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
<AppxBundlePlatforms>x64|arm64</AppxBundlePlatforms>
<!-- 将 xbf 文件打包进 pri -->
<!-- https://github.com/microsoft/terminal/blob/9eb191d5453fed890bad63952e425e1240aa8df5/src/cascadia/TerminalApp/TerminalAppLib.vcxproj#L17-L18 -->
<DisableEmbeddedXbf>false</DisableEmbeddedXbf>
<XamlComponentResourceLocation>nested</XamlComponentResourceLocation>
<!-- 防止编译到子文件夹中 -->
<GenerateProjectSpecificOutputFolder>false</GenerateProjectSpecificOutputFolder>
<!-- 防止 XAML 被拷贝到输出文件夹 -->
<GenerateLibraryLayout>false</GenerateLibraryLayout>
<!-- 链接到桌面版 CRT -->
<DesktopCompatible>true</DesktopCompatible>
<_NoWinAPIFamilyApp>true</_NoWinAPIFamilyApp>
<_VC_Target_Library_Platform>Desktop</_VC_Target_Library_Platform>
<UseCrtSDKReferenceStaticWarning>false</UseCrtSDKReferenceStaticWarning>
<OutDir>$(SolutionDir)bin\$(Platform)\$(Configuration)\</OutDir>
<XamlComponentResourceLocation>nested</XamlComponentResourceLocation>
<!-- 将 xbf 文件打包进 resources.pri -->
<!-- https://github.com/microsoft/terminal/blob/9eb191d5453fed890bad63952e425e1240aa8df5/src/cascadia/TerminalApp/TerminalAppLib.vcxproj#L17-L18 -->
<DisableEmbeddedXbf>false</DisableEmbeddedXbf>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<Import Project="..\Common.Pre.props" />
@ -59,7 +59,7 @@
<Link>
<GenerateWindowsMetadata>false</GenerateWindowsMetadata>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>kernel32.lib;ole32.lib;oleaut32.lib;user32.lib;gdi32.lib;$(OutDir)..\Magpie.Core.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>kernel32.lib;ole32.lib;oleaut32.lib;user32.lib;gdi32.lib;$(OutDir).\Magpie.Core.lib;%(AdditionalDependencies)</AdditionalDependencies>
<ModuleDefinitionFile>Magpie.App.def</ModuleDefinitionFile>
</Link>
</ItemDefinitionGroup>
@ -234,11 +234,6 @@
<SubType>Designer</SubType>
</ApplicationDefinition>
</ItemGroup>
<ItemGroup>
<AppxManifest Include="Package.appxmanifest">
<SubType>Designer</SubType>
</AppxManifest>
</ItemGroup>
<ItemGroup>
<ClCompile Include="AboutPage.cpp">
<DependentUpon>AboutPage.xaml</DependentUpon>
@ -406,6 +401,7 @@
</ItemGroup>
<ItemGroup>
<None Include="Magpie.App.def" />
<None Include="make_resources_pri.py" />
<None Include="SimpleStackPanel.idl">
<SubType>Designer</SubType>
</None>
@ -527,9 +523,6 @@
</None>
</ItemGroup>
<ItemGroup>
<None Include="dummy.exe">
<DeploymentContent>true</DeploymentContent>
</None>
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
@ -613,19 +606,46 @@
</Text>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<!-- resources.pri 中 App.xbf 必须是 Files 的根节点,否则程序将无法运行 -->
<!-- 来自 https://github.com/TranslucentTB/TranslucentTB/blob/f84233b10c7b5ac60c8bed68080d93a056d3f712/Xaml/Xaml.vcxproj#L433 -->
<Target Name="PlaceAppXbfAtRootOfResourceTree" AfterTargets="GetPackagingOutputs">
<ItemGroup>
<_RelocatedAppXamlData Include="@(PackagingOutputs)" Condition="'%(Filename)' == 'App' and ('%(Extension)' == '.xaml' or '%(Extension)' == '.xbf')" />
<PackagingOutputs Remove="@(_RelocatedAppXamlData)" />
<PackagingOutputs Include="@(_RelocatedAppXamlData)">
<TargetPath>%(Filename)%(Extension)</TargetPath>
</PackagingOutputs>
</ItemGroup>
</Target>
<!-- 打包为 AppX (即设置 AppxPackage 属性) 会自动生成 resources.pri但会让 XAML 热重载不起作用,最好避免。 -->
<!-- 我们自己生成 resources.pri它由 PackagingOutputs 包含的 pri 合并而成。 -->
<Target Name="MakeResourcesPri" AfterTargets="_GenerateProjectPriFileCore">
<ItemGroup>
<_PrisToMerge Include="@(PackagingOutputs)" Condition="'%(Extension)' == '.pri'" KeepMetadata="DoesntExist" />
</ItemGroup>
<Message Text="Generating resources.pri" Importance="high" />
<Exec Command="python $(MSBuildThisFileDirectory)make_resources_pri.py &quot;$(OutDir)\&quot; &quot;@(_PrisToMerge)&quot;" />
<ItemGroup>
<!-- 删除 resources.pri 后应触发重新编译 -->
<FileWrites Include="$(OutDir)resources.pri" />
<!-- 有些 pri 引用了额外的负载(例如 WinUI 的噪声图片),编译系统会将它们复制到输出文件夹,但不会监视它们是否有更改。 -->
<!-- 我们需要在负载有更改时触发重新编译,因为 WinUI 的负载是从 AppX 包中提取出来的,见 extract_winui_runtime.py。 -->
<FileReads Include="@(_ExtraPriPayloadFiles)" />
</ItemGroup>
</Target>
<ImportGroup Label="ExtensionTargets">
<Import Project="..\..\packages\Microsoft.Windows.CppWinRT.2.0.230706.1\build\native\Microsoft.Windows.CppWinRT.targets" Condition="Exists('..\..\packages\Microsoft.Windows.CppWinRT.2.0.230706.1\build\native\Microsoft.Windows.CppWinRT.targets')" />
<Import Project="..\..\packages\Microsoft.UI.Xaml.2.8.6\build\native\Microsoft.UI.Xaml.targets" Condition="Exists('..\..\packages\Microsoft.UI.Xaml.2.8.6\build\native\Microsoft.UI.Xaml.targets')" />
<Import Project="..\..\packages\Microsoft.Web.WebView2.1.0.2210.55\build\native\Microsoft.Web.WebView2.targets" Condition="Exists('..\..\packages\Microsoft.Web.WebView2.1.0.2210.55\build\native\Microsoft.Web.WebView2.targets')" />
<Import Project="..\..\packages\Microsoft.Windows.CppWinRT.2.0.240111.5\build\native\Microsoft.Windows.CppWinRT.targets" Condition="Exists('..\..\packages\Microsoft.Windows.CppWinRT.2.0.240111.5\build\native\Microsoft.Windows.CppWinRT.targets')" />
</ImportGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">
<PropertyGroup>
<ErrorText>这台计算机上缺少此项目引用的 NuGet 程序包。使用“NuGet 程序包还原”可下载这些程序包。有关更多信息,请参见 http://go.microsoft.com/fwlink/?LinkID=322105。缺少的文件是 {0}。</ErrorText>
</PropertyGroup>
<Error Condition="!Exists('..\..\packages\Microsoft.Windows.CppWinRT.2.0.230706.1\build\native\Microsoft.Windows.CppWinRT.props')" Text="$([System.String]::Format('$(ErrorText)', '..\..\packages\Microsoft.Windows.CppWinRT.2.0.230706.1\build\native\Microsoft.Windows.CppWinRT.props'))" />
<Error Condition="!Exists('..\..\packages\Microsoft.Windows.CppWinRT.2.0.230706.1\build\native\Microsoft.Windows.CppWinRT.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\..\packages\Microsoft.Windows.CppWinRT.2.0.230706.1\build\native\Microsoft.Windows.CppWinRT.targets'))" />
<Error Condition="!Exists('..\..\packages\Microsoft.UI.Xaml.2.8.6\build\native\Microsoft.UI.Xaml.props')" Text="$([System.String]::Format('$(ErrorText)', '..\..\packages\Microsoft.UI.Xaml.2.8.6\build\native\Microsoft.UI.Xaml.props'))" />
<Error Condition="!Exists('..\..\packages\Microsoft.UI.Xaml.2.8.6\build\native\Microsoft.UI.Xaml.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\..\packages\Microsoft.UI.Xaml.2.8.6\build\native\Microsoft.UI.Xaml.targets'))" />
<Error Condition="!Exists('..\..\packages\Microsoft.Web.WebView2.1.0.2210.55\build\native\Microsoft.Web.WebView2.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\..\packages\Microsoft.Web.WebView2.1.0.2210.55\build\native\Microsoft.Web.WebView2.targets'))" />
<Error Condition="!Exists('..\..\packages\Microsoft.Windows.CppWinRT.2.0.240111.5\build\native\Microsoft.Windows.CppWinRT.props')" Text="$([System.String]::Format('$(ErrorText)', '..\..\packages\Microsoft.Windows.CppWinRT.2.0.240111.5\build\native\Microsoft.Windows.CppWinRT.props'))" />
<Error Condition="!Exists('..\..\packages\Microsoft.Windows.CppWinRT.2.0.240111.5\build\native\Microsoft.Windows.CppWinRT.targets')" Text="$([System.String]::Format('$(ErrorText)', '..\..\packages\Microsoft.Windows.CppWinRT.2.0.240111.5\build\native\Microsoft.Windows.CppWinRT.targets'))" />
</Target>
</Project>
</Project>

View file

@ -9,9 +9,6 @@
<ItemGroup>
<ClCompile Include="pch.cpp" />
<ClCompile Include="App.cpp" />
<ClCompile Include="$(GeneratedFilesDir)module.g.cpp">
<Filter>Miscellaneous</Filter>
</ClCompile>
<ClCompile Include="ShortcutService.cpp">
<Filter>Services</Filter>
</ClCompile>
@ -58,6 +55,7 @@
<ClCompile Include="FileDialogHelper.cpp">
<Filter>Helpers</Filter>
</ClCompile>
<ClCompile Include="$(GeneratedFilesDir)module.g.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="pch.h" />
@ -127,9 +125,6 @@
<Filter Include="Controls">
<UniqueIdentifier>{ca435698-7c9e-4784-a031-2c9db29b5fcc}</UniqueIdentifier>
</Filter>
<Filter Include="Miscellaneous">
<UniqueIdentifier>{7a45d2bb-695d-4658-8fb9-a3d1f3b5aff8}</UniqueIdentifier>
</Filter>
<Filter Include="Converters">
<UniqueIdentifier>{da249886-cfc3-4502-ac6b-a4a2b71807ce}</UniqueIdentifier>
</Filter>
@ -154,9 +149,6 @@
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
<None Include="dummy.exe">
<Filter>Miscellaneous</Filter>
</None>
<None Include="KeyVisual.idl">
<Filter>Controls</Filter>
</None>
@ -276,11 +268,6 @@
<Filter>Controls</Filter>
</Page>
</ItemGroup>
<ItemGroup>
<AppxManifest Include="Package.appxmanifest">
<Filter>Miscellaneous</Filter>
</AppxManifest>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="Magpie.App.rc" />
</ItemGroup>

View file

@ -8,6 +8,7 @@
#include <Psapi.h>
#include "ProfileService.h"
#include "AppXReader.h"
#include "CommonSharedConstants.h"
namespace winrt::Magpie::App::implementation {
@ -143,8 +144,8 @@ void NewProfileViewModel::PrepareForOpen(uint32_t dpi, bool isLightTheme, CoreDi
}
std::vector<IInspectable> profiles;
hstring defaults = ResourceLoader::GetForCurrentView().GetString(L"Root_Defaults/Content");
profiles.push_back(box_value(defaults));
profiles.push_back(box_value(ResourceLoader::GetForCurrentView(
CommonSharedConstants::APP_RESOURCE_MAP_ID).GetString(L"Root_Defaults/Content")));
for (const Profile& profile : AppSettings::Get().Profiles()) {
profiles.push_back(box_value(profile.name));
}

View file

@ -1,27 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Package xmlns="http://schemas.microsoft.com/appx/manifest/foundation/windows10" xmlns:mp="http://schemas.microsoft.com/appx/2014/phone/manifest" xmlns:uap="http://schemas.microsoft.com/appx/manifest/uap/windows10" IgnorableNamespaces="uap mp">
<Identity
Name="95c63e03-47d5-41a3-9dc9-2b82eda2d1fa"
Publisher="CN=Blinue"
Version="1.0.0.0" />
<mp:PhoneIdentity PhoneProductId="95c63e03-47d5-41a3-9dc9-2b82eda2d1fa" PhonePublisherId="00000000-0000-0000-0000-000000000000"/>
<Properties>
<DisplayName>Magpie.App</DisplayName>
<PublisherDisplayName>Blinue</PublisherDisplayName>
<Logo>dummy.png</Logo>
</Properties>
<Dependencies>
<TargetDeviceFamily Name="Windows.Universal" MinVersion="10.0.0.0" MaxVersionTested="10.0.0.0" />
</Dependencies>
<Resources>
<Resource Language="x-generate" />
</Resources>
<Applications>
<Application Id="Magpie.App" Executable="dummy.exe" EntryPoint="Magpie.App.App">
<uap:VisualElements DisplayName="Magpie.App" Description="Magpie.App"
Square150x150Logo="dummy.png" Square44x44Logo="dummy.png" BackgroundColor="transparent">
<uap:DefaultTile/>
</uap:VisualElements>
</Application>
</Applications>
</Package>

View file

@ -16,6 +16,7 @@
#include <dxgi.h>
#include "ScalingService.h"
#include "FileDialogHelper.h"
#include "CommonSharedConstants.h"
using namespace winrt;
using namespace Windows::Graphics::Display;
@ -93,7 +94,8 @@ ProfileViewModel::ProfileViewModel(int profileIdx) : _isDefaultProfile(profileId
_LoadIcon(rootPage);
}
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
{
std::vector<IInspectable> scalingModes;
scalingModes.push_back(box_value(resourceLoader.GetString(L"Profile_General_ScalingMode_None")));
@ -207,10 +209,12 @@ void ProfileViewModel::ChangeExeForLaunching() const noexcept {
return;
}
static std::wstring titleStr(ResourceLoader::GetForCurrentView().GetString(L"SelectLauncherDialog_Title"));
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
static std::wstring titleStr(resourceLoader.GetString(L"SelectLauncherDialog_Title"));
fileDialog->SetTitle(titleStr.c_str());
static std::wstring exeFileStr(ResourceLoader::GetForCurrentView().GetString(L"FileDialog_ExeFile"));
static std::wstring exeFileStr(resourceLoader.GetString(L"FileDialog_ExeFile"));
const COMDLG_FILTERSPEC fileType{ exeFileStr.c_str(), L"*.exe"};
fileDialog->SetFileTypes(1, &fileType);
fileDialog->SetDefaultExtension(L"exe");
@ -249,7 +253,8 @@ void ProfileViewModel::ChangeExeForLaunching() const noexcept {
hstring ProfileViewModel::Name() const noexcept {
if (_data->name.empty()) {
return ResourceLoader::GetForCurrentView().GetString(L"Root_Defaults/Content");
return ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID)
.GetString(L"Root_Defaults/Content");
} else {
return hstring(_data->name);
}
@ -501,7 +506,8 @@ IVector<IInspectable> ProfileViewModel::GraphicsCards() const noexcept {
std::vector<IInspectable> graphicsCards;
graphicsCards.reserve(_graphicsCards.size() + 1);
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring defaultStr = resourceLoader.GetString(L"Profile_General_CaptureMethod_Default");
graphicsCards.push_back(box_value(defaultStr));

View file

@ -68,7 +68,7 @@
<value>Aktivieren</value>
</data>
<data name="Home_AutoRestore_CurWindow" xml:space="preserve">
<value>Jetziges Fenster</value>
<value>Aktuelles Fenster</value>
</data>
<data name="Home_PageFrame.Title" xml:space="preserve">
<value>Startseite</value>
@ -77,19 +77,19 @@
<value>Säubern</value>
</data>
<data name="Home_Timer.Description" xml:space="preserve">
<value>Fenster im Vordergrund skalieren, sobald der Timer ended</value>
<value>Fenster im Vordergrund skalieren, sobald der Timer endet</value>
</data>
<data name="Home_AutoRestore.Description" xml:space="preserve">
<value>Automatisch skalieren, sobald das Fenster zum Vordergrund zurück kehrt.</value>
<value>Automatisch skalieren, sobald das Fenster zum Vordergrund zurück kehrt</value>
</data>
<data name="Home_Timer.Header" xml:space="preserve">
<value>Verspätetes skalieren</value>
<value>Verzögertes skalieren</value>
</data>
<data name="Home_Timer_Cancel" xml:space="preserve">
<value>Abbrechen</value>
</data>
<data name="ShortcutDialog_Description.Text" xml:space="preserve">
<value>Drücke eine Tastenkombination, um die Verknüpfung zu verändern</value>
<value>Drücke eine Tastenkombination, um diese Tastenkombination zu ändern</value>
</data>
<data name="ShortcutDialog_Save" xml:space="preserve">
<value>Speichern</value>
@ -116,7 +116,7 @@
<value>Neues Profil</value>
</data>
<data name="ShortcutDialog_Tip.Text" xml:space="preserve">
<value>Nur Verknüpfungen, die mit Windows Key, Ctrl, Alt oder Shift starten, sind gültig.</value>
<value>Nur Verknüpfungen, die mit der Windows-, Strg-, Alt- oder Umschalttaste starten, sind gültig.</value>
</data>
<data name="ShortcutDialog_Cancel" xml:space="preserve">
<value>Abbrechen</value>
@ -142,4 +142,626 @@
<data name="Settings_General_Theme_Light.Content" xml:space="preserve">
<value>Hell</value>
</data>
<data name="Home_AutoRestore.Header" xml:space="preserve">
<value>Automatisch wiederherstellen</value>
</data>
<data name="Home_Timer_ButtonText" xml:space="preserve">
<value>Skaliert nach {}s</value>
</data>
<data name="Home_Timer_Delay.Header" xml:space="preserve">
<value>Verzögerung in Sekunden</value>
</data>
<data name="ScalingConfiguration_PageFrame.Title" xml:space="preserve">
<value>Skalierungs Konfiguration</value>
</data>
<data name="Settings_Advanced.Header" xml:space="preserve">
<value>Erweitert</value>
</data>
<data name="Settings_General.Header" xml:space="preserve">
<value>Allgemein</value>
</data>
<data name="Settings_General_PortableMode_Locate.Content" xml:space="preserve">
<value>Speicherort der Konfigurationsdatei öffnen</value>
</data>
<data name="Settings_General_Theme_Dark.Content" xml:space="preserve">
<value>Dunkel</value>
</data>
<data name="Home_Shortcuts_Scale.Header" xml:space="preserve">
<value>Skalierung</value>
</data>
<data name="ScalingConfiguration_General_MoreOptions.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Weitere Optionen</value>
</data>
<data name="ScalingConfiguration_ScalingModes.Header" xml:space="preserve">
<value>Skalierungsmodus</value>
</data>
<data name="Settings_Launch_AlwaysRunAsAdmin.Description" xml:space="preserve">
<value>Du brauchst Administrationsrechte, um diese Einstellung zu nutzen</value>
</data>
<data name="Settings_Launch_AlwaysRunAsAdmin.Header" xml:space="preserve">
<value>Immer als Administrator ausführen</value>
</data>
<data name="Home_UpdateCard_DownloadAndInstall.Content" xml:space="preserve">
<value>Herunterladen und installieren</value>
</data>
<data name="Root_NewProfileFlyout_CopyFrom.Text" xml:space="preserve">
<value>Kopiere von</value>
</data>
<data name="Settings_General_Language.Header" xml:space="preserve">
<value>Sprache</value>
</data>
<data name="Settings_General_ShowTrayIcon.Description" xml:space="preserve">
<value>Magpie wird im Hintergrund weiterlaufen, nach dem das Fenster geschlossen wurde</value>
</data>
<data name="Settings_General_ShowTrayIcon.Header" xml:space="preserve">
<value>App in der Taskleiste anzeigen</value>
</data>
<data name="Settings_General_Theme.Header" xml:space="preserve">
<value>Design</value>
</data>
<data name="Home_UpdateCard_AutoCheckForUpdates.Content" xml:space="preserve">
<value>Regelmäßig nach Aktualisierungen suchen</value>
</data>
<data name="Profile_General_3DGameMode.Header" xml:space="preserve">
<value>3D Spielmodus</value>
</data>
<data name="Profile_General_AutoScale.Header" xml:space="preserve">
<value>Automatisch skalieren, wenn im Vordergrund</value>
</data>
<data name="Profile_General_CaptureMethod.Header" xml:space="preserve">
<value>Aufnahmemethode</value>
</data>
<data name="Profile_General_Multimonitor.Header" xml:space="preserve">
<value>Bevorzugte Bildschirme</value>
</data>
<data name="Profile_General_Multimonitor_All.Content" xml:space="preserve">
<value>Alle Bildschirme</value>
</data>
<data name="Profile_General_Multimonitor_Closest.Content" xml:space="preserve">
<value>Bildschirm, der dem Quellfenster am nächsten ist</value>
</data>
<data name="Profile_General_Multimonitor_Intersected.Content" xml:space="preserve">
<value>Bildschirme, die vom Quellfenster durchschnitten werden</value>
</data>
<data name="Profile_General_ScalingMode.Header" xml:space="preserve">
<value>Skalierungsmodus</value>
</data>
<data name="Profile_Launch.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Starten</value>
</data>
<data name="Profile_MoreOptions.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Weitere Optionen</value>
</data>
<data name="Profile_MoreOptions_Delete.Text" xml:space="preserve">
<value>Löschen</value>
</data>
<data name="Profile_MoreOptions_DeleteFlyout_Delete.Text" xml:space="preserve">
<value>Löschen</value>
</data>
<data name="Profile_MoreOptions_OpenProgramLocation.Text" xml:space="preserve">
<value>Datenpfad öffnen</value>
</data>
<data name="Profile_MoreOptions_DeleteFlyout_Title.Text" xml:space="preserve">
<value>Sind Sie sicher, dass Sie dieses Profil löschen möchten?</value>
</data>
<data name="Profile_MoreOptions_RenameFlyout_OK.Content" xml:space="preserve">
<value>OK</value>
</data>
<data name="Profile_MoreOptions_RenameFlyout_Title.Text" xml:space="preserve">
<value>Umbenennen</value>
</data>
<data name="Profile_Performance_GraphicsCard.Header" xml:space="preserve">
<value>Grafikkarte</value>
</data>
<data name="Settings_Launch_RunAtStartup_MinimizeAtStartup.Description" xml:space="preserve">
<value>Um diese Einstellung zu verwenden, müssen Sie die Option "Anwendung in der Taskleiste anzeigen" aktivieren</value>
</data>
<data name="Settings_Launch_RunAtStartup_MinimizeAtStartup.Header" xml:space="preserve">
<value>Beim Starten in die Taskleiste minimieren</value>
</data>
<data name="Profile_Performance_ShowFPS.Header" xml:space="preserve">
<value>FPS Anzeigen</value>
</data>
<data name="AppSettings_Dialog_Error" xml:space="preserve">
<value>Fehler</value>
</data>
<data name="AppSettings_Dialog_Warning" xml:space="preserve">
<value>Warnung</value>
</data>
<data name="AppSettings_UnkownConfiguration_Continue" xml:space="preserve">
<value>Fortfahren</value>
</data>
<data name="About_Version_UpdateCard_ReleaseNotes.Content" xml:space="preserve">
<value>Versionshinweise</value>
</data>
<data name="About_Version_UpdateToDate_DownloadFailed.Text" xml:space="preserve">
<value>Download fehlgeschlagen</value>
</data>
<data name="About_Version_UpdateCard_DownloadAndInstall.Content" xml:space="preserve">
<value>Downloaden und installieren</value>
</data>
<data name="About_DeveloperModeEnabled" xml:space="preserve">
<value>Entwicklermodus ist aktiviert.</value>
</data>
<data name="AppSettings_PortableModeUnkownConfiguration_Content" xml:space="preserve">
<value>Die lokale Konfigurationsdatei stammt von einer unbekannten Version und wird möglicherweise nicht korrekt geparst.</value>
</data>
<data name="AppSettings_PortableModeUnkownConfiguration_Continue" xml:space="preserve">
<value>Fortfahren</value>
</data>
<data name="AppSettings_PortableModeUnkownConfiguration_Exit" xml:space="preserve">
<value>Schließen</value>
</data>
<data name="AppSettings_UnkownConfiguration_Content" xml:space="preserve">
<value>Die globale Konfigurationsdatei stammt von einer unbekannten Version und wird möglicherweise nicht korrekt geparst.</value>
</data>
<data name="Profile_Cursor_DrawCursor_ScalingFactor.Header" xml:space="preserve">
<value>Skalierungsfaktor</value>
</data>
<data name="Profile_Cursor_DrawCursor_Interpolation_Bilinear.Content" xml:space="preserve">
<value>Bilinear</value>
</data>
<data name="Profile_Cursor_DrawCursor_Interpolation_NearestNeighbor.Content" xml:space="preserve">
<value>Nearest-neighbor</value>
</data>
<data name="Profile_Cursor_DrawCursor_ScalingFactor_NoScaling.Content" xml:space="preserve">
<value>Keine Skalierung</value>
</data>
<data name="Profile_Cursor_DrawCursor_ScalingFactor_SameAsSourceWindow.Content" xml:space="preserve">
<value>Wie Quellfenster</value>
</data>
<data name="Profile_General_CaptureMethod_Default" xml:space="preserve">
<value>Standard</value>
</data>
<data name="Profile_SourceWindow.Header" xml:space="preserve">
<value>Quellfenster</value>
</data>
<data name="ShortcutDialog_InUse" xml:space="preserve">
<value>Bereits in Nutzung</value>
</data>
<data name="Settings_General_Language_System" xml:space="preserve">
<value>Windows Standard</value>
</data>
<data name="Settings_General_PortableMode.Header" xml:space="preserve">
<value>Portable mode</value>
</data>
<data name="Settings_Launch_RunAtStartup.Header" xml:space="preserve">
<value>Beim Starten öffnen</value>
</data>
<data name="Home_Shortcuts.Header" xml:space="preserve">
<value>Tastaturkürzel</value>
</data>
<data name="Home_Shortcuts_Scale_ShortcutControl.Title" xml:space="preserve">
<value>Skalierungs Tastaturkürzel</value>
</data>
<data name="Settings_Launch.Header" xml:space="preserve">
<value>Starten</value>
</data>
<data name="ScalingConfiguration_General_Export.Text" xml:space="preserve">
<value>Exportieren</value>
</data>
<data name="ScalingConfiguration_General_Import.Text" xml:space="preserve">
<value>Importieren</value>
</data>
<data name="ScalingConfiguration_General_ImportLegacy.Text" xml:space="preserve">
<value>Importiere ScaleModels.json</value>
</data>
<data name="Home_UpdateCard_ReleaseNotes.Content" xml:space="preserve">
<value>Versionshinweise</value>
</data>
<data name="Home_UpdateCard_RemindMeLater.Content" xml:space="preserve">
<value>Erinnere mich später</value>
</data>
<data name="About_OtherLinks_Repository.Text" xml:space="preserve">
<value>Github repository</value>
</data>
<data name="ScalingConfiguration_Parameters.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Parameter</value>
</data>
<data name="ScalingConfiguration_ScalingModes_MoreOptions.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Weitere Optionen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_MoreOptionsFlyout_Delete.Text" xml:space="preserve">
<value>Löschen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_MoreOptionsFlyout_MoveDown.Text" xml:space="preserve">
<value>Runter</value>
</data>
<data name="ScalingConfiguration_ScalingModes_Rename.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Umbenennen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_RenameFlyout_OK.Content" xml:space="preserve">
<value>OK</value>
</data>
<data name="ScalingConfiguration_ScalingModes_RenameFlyout_Title.Text" xml:space="preserve">
<value>Umbenennen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_AddEffect.Text" xml:space="preserve">
<value>Effekt hinzufügen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_DeleteFlyout_Delete.Text" xml:space="preserve">
<value>Löschen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_DeleteFlyout_Description.Text" xml:space="preserve">
<value>Es wird von den folgenden Profilen verwendet:</value>
</data>
<data name="ScalingConfiguration_ScalingModes_DeleteFlyout_Title.Text" xml:space="preserve">
<value>Sind Sie sicher, dass Sie diesen Skalierungsmodus löschen wollen?</value>
</data>
<data name="ScalingConfiguration_ScalingModes_MoreOptionsFlyout_MoveUp.Text" xml:space="preserve">
<value>Aufwärts bewegen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_DragToReorder.Text" xml:space="preserve">
<value>Ziehen zum Neuordnen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_NewScalingMode.Text" xml:space="preserve">
<value>Neuer Skalierungsmodus</value>
</data>
<data name="ScalingConfiguration_ScalingModes_NewScalingModeFlyout_Title.Text" xml:space="preserve">
<value>Neuer Skalierungsmodus</value>
</data>
<data name="ScalingConfiguration_ScalingModes_Scale.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Skalierung</value>
</data>
<data name="ScalingConfiguration_ScalingModes_NewScalingModeFlyout_CopyFrom.Text" xml:space="preserve">
<value>Kopieren von</value>
</data>
<data name="ScalingConfiguration_ScalingModes_NewScalingModeFlyout_CopyFrom_None" xml:space="preserve">
<value>keiner</value>
</data>
<data name="ScalingConfiguration_ScalingModes_NewScalingModeFlyout_Create.Content" xml:space="preserve">
<value>Erstellen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_NewScalingModeFlyout_Name.Text" xml:space="preserve">
<value>Name</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_HeightFactor.Text" xml:space="preserve">
<value>Höhe (Skalierungsfaktor)</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_HeightPixels.Text" xml:space="preserve">
<value>Höhe (Pixel)</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_WidthFactor.Text" xml:space="preserve">
<value>Breite (Skalierungsfaktor)</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_WidthPixels.Text" xml:space="preserve">
<value>Breite (Pixel)</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type.Text" xml:space="preserve">
<value>Typ</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type_Absolute" xml:space="preserve">
<value>Absolut</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type_Absolute_Description" xml:space="preserve">
<value>Einstellen der Größe nach der Skalierung</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type_Factor" xml:space="preserve">
<value>Faktor</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type_Fill" xml:space="preserve">
<value>Füllen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type_Fill_Description" xml:space="preserve">
<value>Füllt den Bildschirm aus, das Bild könnte gestreckt werden</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type_Fit_Description" xml:space="preserve">
<value>Einstellen des Skalierungsfaktors nach dem proportionalen Ausfüllen des Bildschirms</value>
</data>
<data name="Profile_General.Header" xml:space="preserve">
<value>Allgemein</value>
</data>
<data name="Profile_MoreOptions_Rename.Text" xml:space="preserve">
<value>Umbenennen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type_Fit" xml:space="preserve">
<value>Anpassen</value>
</data>
<data name="Profile_MoreOptions_Reorder.Text" xml:space="preserve">
<value>Umsortieren</value>
</data>
<data name="Profile_MoreOptions_ReorderFlyout_MoveDown.Text" xml:space="preserve">
<value>Runter</value>
</data>
<data name="Profile_MoreOptions_ReorderFlyout_MoveUp.Text" xml:space="preserve">
<value>Hoch</value>
</data>
<data name="Profile_MoreOptions_ReorderFlyout_Title.Text" xml:space="preserve">
<value>Umsortieren</value>
</data>
<data name="ScalingConfiguration_ParsingFailed.Title" xml:space="preserve">
<value>Parsing fehlgeschlagen</value>
</data>
<data name="Profile_General_ScalingMode_None" xml:space="preserve">
<value>Keiner</value>
</data>
<data name="Profile_Performance.Header" xml:space="preserve">
<value>Leistung</value>
</data>
<data name="About_Feedback.Header" xml:space="preserve">
<value>Feedback</value>
</data>
<data name="About_Feedback_Discussion.Header" xml:space="preserve">
<value>Diskussionen</value>
</data>
<data name="About_Feedback_ReportBug.Header" xml:space="preserve">
<value>Einen Fehler melden</value>
</data>
<data name="About_Feedback_RequestFeature.Header" xml:space="preserve">
<value>Eine Funktion vorschlagen</value>
</data>
<data name="About_Version_UpdateSettings_CheckForPreviewUpdates.Header" xml:space="preserve">
<value>Prüfen Sie auf Aktualisierungen der Vorabversion</value>
</data>
<data name="About_Version_UpdateSettings_AutoCheckForUpdates.Header" xml:space="preserve">
<value>Automatisch nach Updates suchen</value>
</data>
<data name="About_Version_CheckForUpdates.Text" xml:space="preserve">
<value>Nach Updates suchen</value>
</data>
<data name="About_Version_CheckForUpdatesFailed.Title" xml:space="preserve">
<value>Suche nach Updates fehlgeschlagen, bitte versuchen Sie es später noch einmal</value>
</data>
<data name="About_Version_CheckingForUpdates.Text" xml:space="preserve">
<value>Suche nach Updates</value>
</data>
<data name="About_Version_UpdateCard_Cancel.Content" xml:space="preserve">
<value>Abbrechen</value>
</data>
<data name="About_Version_UpdateCard_Retry.Content" xml:space="preserve">
<value>Erneut versuchen</value>
</data>
<data name="About_Version_UpdateCard_Title" xml:space="preserve">
<value>{} ist verfügbar</value>
</data>
<data name="About_Version_UpdateToDate.Title" xml:space="preserve">
<value>Magpie ist auf dem neuesten Stand</value>
</data>
<data name="About_Version_UpdateToDate_Downloading.Text" xml:space="preserve">
<value>Downloading</value>
</data>
<data name="About_Version_UpdateToDate_Installing.Text" xml:space="preserve">
<value>Installieren</value>
</data>
<data name="Profile_Advanced.Header" xml:space="preserve">
<value>Erweitert</value>
</data>
<data name="Profile_Advanced_DisableDirectFlip.Header" xml:space="preserve">
<value>DirectFlip deaktivieren</value>
</data>
<data name="Profile_Cursor.Header" xml:space="preserve">
<value>Mauszeiger</value>
</data>
<data name="Profile_Cursor_DrawCursor.Header" xml:space="preserve">
<value>Mauszeiger anzeigen</value>
</data>
<data name="Profile_Cursor_DrawCursor_AdjustCursorSpeed.Header" xml:space="preserve">
<value>Anpassen der Mauszeigergeschwindigkeit während der Skalierung</value>
</data>
<data name="Profile_Cursor_DrawCursor_Interpolation.Header" xml:space="preserve">
<value>Interpolationsalgorithmus</value>
</data>
<data name="Profile_Cursor_DrawCursor_ScalingFactor_Custom.Content" xml:space="preserve">
<value>Benutzerdefiniert</value>
</data>
<data name="AppSettings_UnkownConfiguration_EnablePortableMode" xml:space="preserve">
<value>Portable mode Aktivieren</value>
</data>
<data name="Profile_SourceWindow_CaptureTitleBar.Description" xml:space="preserve">
<value>Beschränkt auf Grafikerfassung und Desktopduplikation</value>
</data>
<data name="Profile_SourceWindow_CaptureTitleBar.Header" xml:space="preserve">
<value>Titelleiste aufnehmen</value>
</data>
<data name="Profile_SourceWindow_CustomCropping.Header" xml:space="preserve">
<value>Benutzerdefiniertes Zuschneiden</value>
</data>
<data name="Profile_SourceWindow_CustomCropping_Bottom.Header" xml:space="preserve">
<value>Unten</value>
</data>
<data name="Profile_SourceWindow_CustomCropping_Left.Header" xml:space="preserve">
<value>Links</value>
</data>
<data name="Profile_SourceWindow_CustomCropping_Pixels.Text" xml:space="preserve">
<value>px</value>
</data>
<data name="Profile_SourceWindow_CustomCropping_Right.Header" xml:space="preserve">
<value>Rechts</value>
</data>
<data name="Profile_SourceWindow_CustomCropping_Top.Header" xml:space="preserve">
<value>Oben</value>
</data>
<data name="Profile_SourceWindow_DisableWindowResizing.Header" xml:space="preserve">
<value>Fenstergrößenänderung bei Skalierung deaktivieren</value>
</data>
<data name="Settings_Advanced_InlineParams.Description" xml:space="preserve">
<value>Bringt eine kleine Leistungssteigerung. Allerdings müssen die Effekte jedes Mal neu kompiliert werden, wenn ihre Parameter geändert werden</value>
</data>
<data name="Settings_Advanced_InlineParams.Header" xml:space="preserve">
<value>Effektparameter inline machen</value>
</data>
<data name="Settings_Advanced_SimulateExclusiveFullscreen.Description" xml:space="preserve">
<value>Benachrichtigungen und Pop-ups von bestimmten Anwendungen werden blockiert</value>
</data>
<data name="Settings_Advanced_SimulateExclusiveFullscreen.Header" xml:space="preserve">
<value>Bei Skalierung exklusiven Vollbild simulieren</value>
</data>
<data name="Settings_DeveloperOptions.Description" xml:space="preserve">
<value>Diese Einstellungen sind nur für Entwicklung gedacht</value>
</data>
<data name="Settings_DeveloperOptions.Header" xml:space="preserve">
<value>Entwickleroptionen</value>
</data>
<data name="Settings_DeveloperOptions_DebugMode.Content" xml:space="preserve">
<value>Debug Modus</value>
</data>
<data name="Settings_DeveloperOptions_DisableEffectCache.Content" xml:space="preserve">
<value>Effektcache deaktivieren</value>
</data>
<data name="AppSettings_Dialog_Exit" xml:space="preserve">
<value>Schließen</value>
</data>
<data name="Settings_General_RequireRestart_ActionButton.Content" xml:space="preserve">
<value>Magpie Neustarten</value>
</data>
<data name="ScalingConfiguration_ScalingModes_Description_UnknownEffect" xml:space="preserve">
<value>Unbekannter Effekt</value>
</data>
<data name="ScalingConfiguration_ScalingModes_HasUnkownEffects.Title" xml:space="preserve">
<value>Einige Effekte können nicht geparst werden</value>
</data>
<data name="ExportDialog_Title" xml:space="preserve">
<value>Skalierungsmodi exportieren</value>
</data>
<data name="ImportDialog_Title" xml:space="preserve">
<value>Skalierungsmodi importieren</value>
</data>
<data name="About_Version_UpdateSettings.Header" xml:space="preserve">
<value>Einstellungen aktualisieren</value>
</data>
<data name="Profile_Advanced_LaunchParameters.Header" xml:space="preserve">
<value>Startparameter</value>
</data>
<data name="Overlay_Profiler" xml:space="preserve">
<value>Profiler</value>
</data>
<data name="Overlay_Profiler_CaptureMethod" xml:space="preserve">
<value>Aufnahmemethode</value>
</data>
<data name="Overlay_FPS_Lock" xml:space="preserve">
<value>Begrenzen</value>
</data>
<data name="Overlay_FPS_Opacity" xml:space="preserve">
<value>Deckkraft</value>
</data>
<data name="Overlay_Profiler_FrameStatistics" xml:space="preserve">
<value>Bildstatistiken</value>
</data>
<data name="Overlay_Profiler_FrameStatistics_SwitchToFrameRates" xml:space="preserve">
<value>Umschalten auf Bildraten</value>
</data>
<data name="Overlay_Profiler_Timings_SwitchToEffects" xml:space="preserve">
<value>Zu Effekten wechseln</value>
</data>
<data name="Overlay_Profiler_Timings_SwitchToPasses" xml:space="preserve">
<value>Zu passes wechseln</value>
</data>
<data name="Overlay_Profiler_Timings_Total" xml:space="preserve">
<value>Gesamt</value>
</data>
<data name="Settings_DeveloperOptions_DisableFontCache.Content" xml:space="preserve">
<value>Schrift-Cache deaktivieren</value>
</data>
<data name="Settings_Advanced_AllowScalingMaximized.Header" xml:space="preserve">
<value>Skalieren von maximierten oder ausfüllenden Fenstern zulassen</value>
</data>
<data name="FileDialog_JsonFile" xml:space="preserve">
<value>JSON Datei</value>
</data>
<data name="Profile_MoreOptions_ChangeExecutableForLaunching.Text" xml:space="preserve">
<value>Ausführbare Datei für den Start austauschen</value>
</data>
<data name="FileDialog_ExeFile" xml:space="preserve">
<value>Ausführbare Datei</value>
</data>
<data name="SelectLauncherDialog_Title" xml:space="preserve">
<value>Wählen Sie die ausführbare Datei, um das Programm zu starten</value>
</data>
<data name="Home_Shortcuts_Overlay.Description" xml:space="preserve">
<value>Überwachen Sie die Rendering-Leistung während der Skalierung</value>
</data>
<data name="Home_Shortcuts_Scale.Description" xml:space="preserve">
<value>Skalieren des Vordergrundfensters oder Beenden der Skalierung</value>
</data>
<data name="Home_AutoRestore_EmptyTitle" xml:space="preserve">
<value>Titel ist leer</value>
</data>
<data name="Settings_DeveloperOptions_SaveEffectSources.Content" xml:space="preserve">
<value>Quellcode beim Parsen von Effekten speichern</value>
</data>
<data name="Settings_DeveloperOptions_WarningsAreErrors.Content" xml:space="preserve">
<value>Warnungen beim Kompilieren von Effekten als Fehler betrachten</value>
</data>
<data name="AppSettings_ErrorDialog_ConfigLocation" xml:space="preserve">
<value>Speicherort der Konfigurationsdatei:
{}</value>
</data>
<data name="AppSettings_ErrorDialog_NotValidJson" xml:space="preserve">
<value>Die Konfigurationsdatei hat kein gültiges JSON Format</value>
</data>
<data name="AppSettings_ErrorDialog_ParseFailed" xml:space="preserve">
<value>Konfigurationsdatei konnte nicht geparst werden</value>
</data>
<data name="TrayIcon_MainWindow" xml:space="preserve">
<value>Hauptfenster</value>
</data>
<data name="AppSettings_ErrorDialog_ReadFailed" xml:space="preserve">
<value>Konfigurationsdatei konnte nicht gelesen werden</value>
</data>
<data name="TrayIcon_Exit" xml:space="preserve">
<value>Schließen</value>
</data>
<data name="Settings_General_Theme_System.Content" xml:space="preserve">
<value>Windows Standard</value>
</data>
<data name="Settings_General_RequireRestart.Title" xml:space="preserve">
<value>Damit die Änderung wirksam wird, ist ein Neustart erforderlich</value>
</data>
<data name="Home_Shortcuts_Overlay.Header" xml:space="preserve">
<value>Anzeige im Spiel</value>
</data>
<data name="ImportLegacyDialog_Title" xml:space="preserve">
<value>ScaleModels.json importieren</value>
</data>
<data name="About_Version_Version" xml:space="preserve">
<value>Version</value>
</data>
<data name="Home_Shortcuts_Overlay_ShortcutControl.Title" xml:space="preserve">
<value>Tastenkürzel für die Anzeige im Spiel</value>
</data>
<data name="Home_UpdateCard_Title" xml:space="preserve">
<value>{} ist verfügbar</value>
</data>
<data name="About_OtherLinks.Header" xml:space="preserve">
<value>Andere Links</value>
</data>
<data name="About_OtherLinks_ContributionGuidelines.Text" xml:space="preserve">
<value>Beitragsrichtlinien</value>
</data>
<data name="About_OtherLinks_FAQ.Text" xml:space="preserve">
<value>FAQ</value>
</data>
<data name="About_OtherLinks_License.Text" xml:space="preserve">
<value>Lizenz</value>
</data>
<data name="ScalingConfiguration_ScalingModes_Delete.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Löschen</value>
</data>
<data name="ScalingConfiguration_ScalingModes_DragNotSupported.Text" xml:space="preserve">
<value>Drag and drop wird bei der Ausführung als Administrator nicht unterstützt</value>
</data>
<data name="ScalingConfiguration_ScalingModes_MoveDown.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Runter</value>
</data>
<data name="ScalingConfiguration_ScalingModes_MoveUp.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>Hoch</value>
</data>
<data name="ScalingConfiguration_ScalingModes_ScaleFlyout_Type_Factor_Description" xml:space="preserve">
<value>Einstellen des Skalierungsfaktors relativ zum Eingabebild</value>
</data>
<data name="Root_Defaults.Content" xml:space="preserve">
<value>Voreinstellungen</value>
</data>
<data name="Overlay_FPS_Unlock" xml:space="preserve">
<value>Freischalten</value>
</data>
<data name="Overlay_Profiler_FrameStatistics_SwitchToFrameTimings" xml:space="preserve">
<value>Umschalten auf frame timings</value>
</data>
<data name="Overlay_Profiler_Timings" xml:space="preserve">
<value>Timings</value>
</data>
<data name="About_Version_CommitId" xml:space="preserve">
<value>Commit</value>
</data>
</root>

View file

@ -286,7 +286,7 @@
<value>スケーリングのショートカット</value>
</data>
<data name="Settings_Launch.Header" xml:space="preserve">
<value>立ち上げ</value>
<value>起動</value>
</data>
<data name="ScalingConfiguration_General_Export.Text" xml:space="preserve">
<value>エクスポート</value>
@ -412,7 +412,7 @@
<value>スケーリングモード</value>
</data>
<data name="Profile_Launch.[using:Windows.UI.Xaml.Controls]ToolTipService.ToolTip" xml:space="preserve">
<value>立ち上げ</value>
<value>起動</value>
</data>
<data name="Profile_MoreOptions_Delete.Text" xml:space="preserve">
<value>削除</value>
@ -823,4 +823,4 @@
<data name="About_DeveloperModeEnabled" xml:space="preserve">
<value>デベロッパーモードが有効。</value>
</data>
</root>
</root>

View file

@ -310,7 +310,7 @@
<value>더 많은 옵션</value>
</data>
<data name="Home_UpdateCard_ReleaseNotes.Content" xml:space="preserve">
<value>릴리 노트</value>
<value>릴리 노트</value>
</data>
<data name="Home_UpdateCard_RemindMeLater.Content" xml:space="preserve">
<value>나중에 다시 알림</value>
@ -683,7 +683,7 @@
<value>Magpie가 최신 상태입니다</value>
</data>
<data name="About_Version_UpdateCard_ReleaseNotes.Content" xml:space="preserve">
<value>릴리 노트</value>
<value>릴리 노트</value>
</data>
<data name="Profile_Cursor.Header" xml:space="preserve">
<value>커서</value>

View file

@ -820,4 +820,7 @@
<data name="About_DeveloperModeEnabled" xml:space="preserve">
<value>Modo desenvolvedor está habilitado.</value>
</data>
<data name="About_Version_CommitId" xml:space="preserve">
<value>Comitar</value>
</data>
</root>

View file

@ -817,4 +817,10 @@
<data name="Home_Shortcuts_Scale.Description" xml:space="preserve">
<value>Масштабувати вікно переднього плану або зупинити масштабування</value>
</data>
<data name="About_DeveloperModeEnabled" xml:space="preserve">
<value>Режим розробника увімкнено.</value>
</data>
<data name="About_Version_CommitId" xml:space="preserve">
<value>Номер коміту</value>
</data>
</root>

View file

@ -171,7 +171,8 @@ void RootPage::NavigationView_DisplayModeChanged(MUXC::NavigationView const& nv,
// !!! HACK !!!
// 使导航栏的可滚动区域不会覆盖标题栏
FrameworkElement menuItemsScrollViewer = nv.GetTemplateChild(L"MenuItemsScrollViewer").as<FrameworkElement>();
FrameworkElement menuItemsScrollViewer = nv.as<IControlProtected>()
.GetTemplateChild(L"MenuItemsScrollViewer").as<FrameworkElement>();
menuItemsScrollViewer.Margin({ 0,isExpanded ? TitleBar().ActualHeight() : 0.0,0,0});
XamlUtils::UpdateThemeOfTooltips(*this, ActualTheme());
@ -237,13 +238,15 @@ fire_and_forget RootPage::ShowToast(const hstring& message) {
return;
}
IControlProtected protectedAccessor = toastTeachingTip.as<IControlProtected>();
// 隐藏关闭按钮
if (DependencyObject closeButton = toastTeachingTip.GetTemplateChild(L"AlternateCloseButton")) {
if (DependencyObject closeButton = protectedAccessor.GetTemplateChild(L"AlternateCloseButton")) {
closeButton.as<FrameworkElement>().Visibility(Visibility::Collapsed);
}
// 减小 Flyout 尺寸
if (DependencyObject container = toastTeachingTip.GetTemplateChild(L"TailOcclusionGrid")) {
if (DependencyObject container = protectedAccessor.GetTemplateChild(L"TailOcclusionGrid")) {
container.as<FrameworkElement>().MinWidth(0.0);
}
});

View file

@ -11,6 +11,7 @@
#include "Win32Utils.h"
#include "ScalingMode.h"
#include "FileDialogHelper.h"
#include "CommonSharedConstants.h"
using namespace ::Magpie::Core;
@ -33,7 +34,9 @@ ScalingConfigurationViewModel::ScalingConfigurationViewModel() {
}
static std::optional<std::wstring> OpenFileDialogForJson(IFileDialog* fileDialog) noexcept {
static std::wstring jsonFileStr(ResourceLoader::GetForCurrentView().GetString(L"FileDialog_JsonFile"));
static std::wstring jsonFileStr(
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID)
.GetString(L"FileDialog_JsonFile"));
const COMDLG_FILTERSPEC fileType{ jsonFileStr.c_str(), L"*.json"};
fileDialog->SetFileTypes(1, &fileType);
@ -50,7 +53,9 @@ void ScalingConfigurationViewModel::Export() const noexcept {
}
fileDialog->SetFileName(L"ScalingModes");
static std::wstring title(ResourceLoader::GetForCurrentView().GetString(L"ExportDialog_Title"));
static std::wstring title(
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID)
.GetString(L"ExportDialog_Title"));
fileDialog->SetTitle(title.c_str());
std::optional<std::wstring> fileName = OpenFileDialogForJson(fileDialog.get());
@ -74,7 +79,8 @@ static bool ImportImpl(bool legacy) noexcept {
return false;
}
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring title = resourceLoader.GetString(legacy ? L"ImportLegacyDialog_Title" : L"ImportDialog_Title");
fileDialog->SetTitle(title.c_str());
@ -120,7 +126,8 @@ void ScalingConfigurationViewModel::_Import(bool legacy) {
void ScalingConfigurationViewModel::PrepareForAdd() {
std::vector<IInspectable> copyFromList;
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
copyFromList.push_back(box_value(resourceLoader.GetString(
L"ScalingConfiguration_ScalingModes_NewScalingModeFlyout_CopyFrom_None")));

View file

@ -11,6 +11,7 @@
#include "Logger.h"
#include "ScalingMode.h"
#include "StrUtils.h"
#include "CommonSharedConstants.h"
using namespace ::Magpie::Core;
namespace MagpieCore = ::Magpie::Core;
@ -28,7 +29,8 @@ ScalingModeEffectItem::ScalingModeEffectItem(uint32_t scalingModeIdx, uint32_t e
_name = EffectHelper::GetDisplayName(data.name);
_parametersViewModel = EffectParametersViewModel(scalingModeIdx, effectIdx);
} else {
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
_name = StrUtils::Concat(
resourceLoader.GetString(L"ScalingConfiguration_ScalingModes_Description_UnknownEffect"),
L" (",
@ -64,7 +66,8 @@ bool ScalingModeEffectItem::HasParameters() const noexcept {
IVector<IInspectable> ScalingModeEffectItem::ScalingTypes() noexcept {
using Windows::ApplicationModel::Resources::ResourceLoader;
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
return single_threaded_vector(std::vector<IInspectable>{
Magpie::App::ScalingType(

View file

@ -9,6 +9,7 @@
#include "AppSettings.h"
#include "EffectsService.h"
#include "EffectHelper.h"
#include "CommonSharedConstants.h"
using namespace Magpie::Core;
@ -21,7 +22,8 @@ ScalingModeItem::ScalingModeItem(uint32_t index, bool isInitialExpanded)
std::vector<IInspectable> linkedProfiles;
const Profile& defaultProfile = AppSettings::Get().DefaultProfile();
if (defaultProfile.scalingMode == (int)index) {
hstring defaults = ResourceLoader::GetForCurrentView().GetString(L"Root_Defaults/Content");
hstring defaults = ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID)
.GetString(L"Root_Defaults/Content");
linkedProfiles.push_back(box_value(defaults));
}
for (const Profile& profile : AppSettings::Get().Profiles()) {
@ -229,7 +231,8 @@ hstring ScalingModeItem::Description() const noexcept {
if (const EffectInfo* effectInfo = EffectsService::Get().GetEffect(effect.name)) {
result += EffectHelper::GetDisplayName(effect.name);
} else {
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
result += L'(';
result += resourceLoader.GetString(L"ScalingConfiguration_ScalingModes_Description_UnknownEffect");
result += L')';

View file

@ -5,6 +5,7 @@
#endif
#include "XamlUtils.h"
#include "ComboBoxHelper.h"
#include "CommonSharedConstants.h"
using namespace winrt;
using namespace Windows::UI::Xaml::Input;
@ -14,8 +15,10 @@ namespace winrt::Magpie::App::implementation {
void SettingsPage::InitializeComponent() {
SettingsPageT::InitializeComponent();
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
hstring versionStr = resourceLoader.GetString(L"ms-resource://Magpie.App/Microsoft.UI.Xaml/Resources/SettingsButtonName");
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
hstring versionStr = resourceLoader.
GetString(L"ms-resource://Magpie.App/Microsoft.UI.Xaml/Resources/SettingsButtonName");
SettingsPageFrame().Title(versionStr);
}

View file

@ -22,7 +22,8 @@ IVector<IInspectable> SettingsViewModel::Languages() const {
std::vector<IInspectable> languages;
languages.reserve(tags.size() + 1);
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
languages.push_back(box_value(resourceLoader.GetString(L"Settings_General_Language_System")));
for (const wchar_t* tag : tags) {
Windows::Globalization::Language language(tag);

View file

@ -9,6 +9,7 @@
#include "XamlUtils.h"
#include "ContentDialogHelper.h"
#include "Logger.h"
#include "CommonSharedConstants.h"
using namespace winrt;
using namespace Windows::UI::Xaml::Controls;
@ -79,7 +80,8 @@ fire_and_forget ShortcutControl::EditButton_Click(IInspectable const&, RoutedEve
_shortcutDialog.Language(Language());
_shortcutDialog.Title(GetValue(TitleProperty));
_shortcutDialog.Content(_ShortcutDialogContent);
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
_shortcutDialog.PrimaryButtonText(resourceLoader.GetString(L"ShortcutDialog_Save"));
_shortcutDialog.CloseButtonText(resourceLoader.GetString(L"ShortcutDialog_Cancel"));
_shortcutDialog.DefaultButton(ContentDialogButton::Primary);

View file

@ -3,6 +3,7 @@
#if __has_include("ShortcutDialog.g.cpp")
#include "ShortcutDialog.g.cpp"
#endif
#include "CommonSharedConstants.h"
namespace winrt::Magpie::App::implementation {
@ -16,14 +17,16 @@ void ShortcutDialog::Error(ShortcutError value) {
case ShortcutError::Invalid:
{
WarningBanner().Visibility(Visibility::Visible);
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
InvalidShortcutWarningLabel().Text(resourceLoader.GetString(L"ShortcutDialog_InvalidShortcut"));
break;
}
case ShortcutError::Occupied:
{
WarningBanner().Visibility(Visibility::Visible);
ResourceLoader resourceLoader = ResourceLoader::GetForCurrentView();
ResourceLoader resourceLoader =
ResourceLoader::GetForCurrentView(CommonSharedConstants::APP_RESOURCE_MAP_ID);
InvalidShortcutWarningLabel().Text(resourceLoader.GetString(L"ShortcutDialog_InUse"));
break;
}

View file

@ -1,12 +1,12 @@
[requires]
fmt/10.1.1
spdlog/1.12.0
fmt/10.2.1
spdlog/1.13.0
parallel-hashmap/1.37
rapidjson/cci.20230929
kuba-zip/0.3.0
kuba-zip/0.3.1
muparser/2.3.4
yas/7.1.0
imgui/1.90
imgui/1.90.4
[generators]
MSBuildDeps

View file

@ -1 +0,0 @@
Visual Studio 需要此占位可执行文件才能正确编译项目

View file

@ -0,0 +1,48 @@
import sys
import os
import glob
import subprocess
if len(sys.argv) != 3:
raise Exception("请勿直接运行此脚本")
windowsSdkDir = max(
glob.glob(os.environ["ProgramFiles(x86)"] + "\\Windows Kits\\10\\bin\\10.*")
)
makepriPath = windowsSdkDir + "\\x64\\makepri.exe"
if not os.access(makepriPath, os.X_OK):
raise Exception("未找到 makepri")
os.chdir(sys.argv[1])
with open("priconfig.xml", "w") as priConfig:
priConfig.write(
'<?xml version="1.0" encoding="utf-8"?>\n<resources targetOsVersion="10.0.0" majorVersion="1">'
)
for priPath in sys.argv[2].split(";"):
priConfig.write(
f"""
<index root="\" startIndexAt="{priPath}">
<default>
<qualifier name="Language" value="en-US" />
<qualifier name="Contrast" value="standard" />
<qualifier name="Scale" value="200" />
<qualifier name="HomeRegion" value="001" />
<qualifier name="TargetSize" value="256" />
<qualifier name="LayoutDirection" value="LTR" />
<qualifier name="DXFeatureLevel" value="DX9" />
<qualifier name="Configuration" value="" />
<qualifier name="AlternateForm" value="" />
<qualifier name="Platform" value="UAP" />
</default>
<indexer-config type="PRI" />
<indexer-config type="RESFILES" qualifierDelimiter="." />
</index>"""
)
priConfig.write("\n</resources>")
subprocess.run(
f'"{makepriPath}" New /pr . /cf priconfig.xml /of resources.pri /in Magpie.App /o',
capture_output=True,
)
os.remove("priconfig.xml")

View file

@ -2,5 +2,5 @@
<packages>
<package id="Microsoft.UI.Xaml" version="2.8.6" targetFramework="native" />
<package id="Microsoft.Web.WebView2" version="1.0.2210.55" targetFramework="native" />
<package id="Microsoft.Windows.CppWinRT" version="2.0.230706.1" targetFramework="native" />
</packages>
<package id="Microsoft.Windows.CppWinRT" version="2.0.240111.5" targetFramework="native" />
</packages>

View file

@ -121,11 +121,21 @@ bool DeviceResources::_ObtainAdapterAndDevice(int adapterIdx) noexcept {
// https://docs.microsoft.com/en-us/windows/win32/direct3darticles/directx-warp
HRESULT hr = _dxgiFactory->EnumWarpAdapter(IID_PPV_ARGS(&adapter));
if (FAILED(hr)) {
Logger::Get().ComError("EnumWarpAdapter 失败", hr);
return false;
}
if (!_TryCreateD3DDevice(adapter)) {
Logger::Get().ComError("创建 WARP 设备失败", hr);
return false;
}
Logger::Get().Info("已创建 WARP 设备");
DXGI_ADAPTER_DESC1 desc;
hr = adapter->GetDesc1(&desc);
if (SUCCEEDED(hr)) {
LogAdapter(desc);
}
return true;
}

View file

@ -1373,8 +1373,11 @@ static uint32_t CompilePasses(
cbHlsl.append("cbuffer __CB2 : register(b1) { uint __frameCount; };\n\n");
}
if ((flags & EffectCompilerFlags::SaveSources) && !Win32Utils::DirExists(CommonSharedConstants::SOURCES_DIR)) {
if (!CreateDirectory(CommonSharedConstants::SOURCES_DIR, nullptr)) {
std::wstring sourcesPathName = StrUtils::Concat(CommonSharedConstants::SOURCES_DIR, StrUtils::UTF8ToUTF16(desc.name));
std::wstring sourcesPath = sourcesPathName.substr(0, sourcesPathName.find_last_of(L'\\'));
if ((flags & EffectCompilerFlags::SaveSources) && !Win32Utils::DirExists(sourcesPath.c_str())) {
if (!Win32Utils::CreateDir(sourcesPath, true)) {
Logger::Get().Win32Error("创建 sources 文件夹失败");
}
}
@ -1395,8 +1398,8 @@ static uint32_t CompilePasses(
if (flags & EffectCompilerFlags::SaveSources) {
std::wstring fileName = desc.passes.size() == 1
? fmt::format(L"{}{}.hlsl", CommonSharedConstants::SOURCES_DIR, StrUtils::UTF8ToUTF16(desc.name))
: fmt::format(L"{}{}_Pass{}.hlsl", CommonSharedConstants::SOURCES_DIR, StrUtils::UTF8ToUTF16(desc.name), id + 1);
? StrUtils::Concat(sourcesPathName, L".hlsl")
: fmt::format(L"{}_Pass{}.hlsl", sourcesPathName, id + 1);
if (!Win32Utils::WriteFile(fileName.c_str(), source.data(), source.size())) {
Logger::Get().Error(fmt::format("保存 Pass{} 源码失败", id + 1));

Some files were not shown because too many files have changed in this diff Show more