diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index a0b543cd3..48f3cee22 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -724,6 +724,114 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi #undef P +)"; + } + else if (texture_filter == GPUTextureFilter::MMPX) + { + ss << "#define src(xoffs, yoffs) packUnorm4x8(SampleFromVRAM(texpage, clamp(bcoords + float2((xoffs), (yoffs)), " + "uv_limits.xy, uv_limits.zw)))\n"; + + /* + * This part of the shader is from MMPX.glc from https://casual-effects.com/research/McGuire2021PixelArt/index.html + * Copyright 2020 Morgan McGuire & Mara Gagiu. + * Provided under the Open Source MIT license https://opensource.org/licenses/MIT + */ + ss << R"( +uint luma(uint C) { + uint alpha = (C & 0xFF000000u) >> 24; + return (((C & 0x00FF0000u) >> 16) + ((C & 0x0000FF00u) >> 8) + (C & 0x000000FFu) + 1u) * (256u - alpha); +} + +bool all_eq2(uint B, uint A0, uint A1) { + return ((B ^ A0) | (B ^ A1)) == 0u; +} + +bool all_eq3(uint B, uint A0, uint A1, uint A2) { + return ((B ^ A0) | (B ^ A1) | (B ^ A2)) == 0u; +} + +bool all_eq4(uint B, uint A0, uint A1, uint A2, uint A3) { + return ((B ^ A0) | (B ^ A1) | (B ^ A2) | (B ^ A3)) == 0u; +} + +bool any_eq3(uint B, uint A0, uint A1, uint A2) { + return B == A0 || B == A1 || B == A2; +} + +bool none_eq2(uint B, uint A0, uint A1) { + return (B != A0) && (B != A1); +} + +bool none_eq4(uint B, uint A0, uint A1, uint A2, uint A3) { + return B != A0 && B != A1 && B != A2 && B != A3; +} + +void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limits, out float4 texcol, out float ialpha) +{ + float2 bcoords = floor(coords); + + uint A = src(-1, -1), B = src(+0, -1), C = src(+1, -1); + uint D = src(-1, +0), E = src(+0, +0), F = src(+1, +0); + uint G = src(-1, +1), H = src(+0, +1), I = src(+1, +1); + + uint J = E, K = E, L = E, M = E; + + if (((A ^ E) | (B ^ E) | (C ^ E) | (D ^ E) | (F ^ E) | (G ^ E) | (H ^ E) | (I ^ E)) != 0u) { + uint P = src(+0, -2), S = src(+0, +2); + uint Q = src(-2, +0), R = src(+2, +0); + uint Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H); + + // 1:1 slope rules + if ((D == B && D != H && D != F) && (El >= Dl || E == A) && any_eq3(E, A, C, G) && ((El < Dl) || A != D || E != P || E != Q)) J = D; + if ((B == F && B != D && B != H) && (El >= Bl || E == C) && any_eq3(E, A, C, I) && ((El < Bl) || C != B || E != P || E != R)) K = B; + if ((H == D && H != F && H != B) && (El >= Hl || E == G) && any_eq3(E, A, G, I) && ((El < Hl) || G != H || E != S || E != Q)) L = H; + if ((F == H && F != B && F != D) && (El >= Fl || E == I) && any_eq3(E, C, G, I) && ((El < Fl) || I != H || E != R || E != S)) M = F; + + // Intersection rules + if ((E != F && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (F != src(+3, +0))) K = M = F; + if ((E != D && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (D != src(-3, +0))) J = L = D; + if ((E != H && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (H != src(+0, +3))) L = M = H; + if ((E != B && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (B != src(+0, -3))) J = K = B; + if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B; + if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H; + if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F; + if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D; + + // 2:1 slope rules + if (H != B) { + if (H != A && H != E && H != C) { + if (all_eq3(H, G, F, R) && none_eq2(H, D, src(+2, -1))) L = M; + if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(-2, -1))) M = L; + } + + if (B != I && B != G && B != E) { + if (all_eq3(B, A, F, R) && none_eq2(B, D, src(+2, +1))) J = K; + if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(-2, +1))) K = J; + } + } // H !== B + + if (F != D) { + if (D != I && D != E && D != C) { + if (all_eq3(D, A, H, S) && none_eq2(D, B, src(+1, +2))) J = L; + if (all_eq3(D, G, B, P) && none_eq2(D, H, src(+1, -2))) L = J; + } + + if (F != E && F != A && F != G) { + if (all_eq3(F, C, H, S) && none_eq2(F, B, src(-1, +2))) K = M; + if (all_eq3(F, I, B, P) && none_eq2(F, H, src(-1, -2))) M = K; + } + } // F !== D + } // not constant + + // select quadrant based on fractional part of texture coordinates + float2 fpart = frac(coords); + uint res = (fpart.x < 0.5f) ? ((fpart.y < 0.5f) ? J : L) : ((fpart.y < 0.5f) ? K : M); + + ialpha = float(res != 0u); + texcol = unpackUnorm4x8(res); +} + +#undef src )"; } } diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 6990e04ae..3b04be3f5 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -1531,7 +1531,7 @@ GPURenderer Settings::GetAutomaticRenderer() } static constexpr const std::array s_texture_filter_names = { - "Nearest", "Bilinear", "BilinearBinAlpha", "JINC2", "JINC2BinAlpha", "xBR", "xBRBinAlpha", + "Nearest", "Bilinear", "BilinearBinAlpha", "JINC2", "JINC2BinAlpha", "xBR", "xBRBinAlpha", "MMPX", }; static constexpr const std::array s_texture_filter_display_names = { TRANSLATE_DISAMBIG_NOOP("Settings", "Nearest-Neighbor", "GPUTextureFilter"), @@ -1541,6 +1541,7 @@ static constexpr const std::array s_texture_filter_display_names = { TRANSLATE_DISAMBIG_NOOP("Settings", "JINC2 (Slow, No Edge Blending)", "GPUTextureFilter"), TRANSLATE_DISAMBIG_NOOP("Settings", "xBR (Very Slow)", "GPUTextureFilter"), TRANSLATE_DISAMBIG_NOOP("Settings", "xBR (Very Slow, No Edge Blending)", "GPUTextureFilter"), + TRANSLATE_DISAMBIG_NOOP("Settings", "MMPX (Slow)", "GPUTextureFilter"), }; std::optional Settings::ParseTextureFilterName(const char* str) diff --git a/src/core/shader_cache_version.h b/src/core/shader_cache_version.h index fed074bcc..30121cf2c 100644 --- a/src/core/shader_cache_version.h +++ b/src/core/shader_cache_version.h @@ -5,4 +5,4 @@ #include "common/types.h" -static constexpr u32 SHADER_CACHE_VERSION = 29; +static constexpr u32 SHADER_CACHE_VERSION = 30; diff --git a/src/core/types.h b/src/core/types.h index 9a55e0a83..d2f16fb86 100644 --- a/src/core/types.h +++ b/src/core/types.h @@ -97,6 +97,7 @@ enum class GPUTextureFilter : u8 JINC2BinAlpha, xBR, xBRBinAlpha, + MMPX, Count }; diff --git a/src/util/shadergen.cpp b/src/util/shadergen.cpp index 282ef9d7a..bee236766 100644 --- a/src/util/shadergen.cpp +++ b/src/util/shadergen.cpp @@ -300,10 +300,10 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */ ss << "#define END_ARRAY )\n"; ss << "#define VECTOR_BROADCAST(type, value) (type(value))\n"; - ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n"; - ss << "float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n"; - ss << "float3 saturate(float3 value) { return clamp(value, float3(0.0, 0.0, 0.0), float3(1.0, 1.0, 1.0)); }\n"; - ss << "float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, " + ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n" + "float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n" + "float3 saturate(float3 value) { return clamp(value, float3(0.0, 0.0, 0.0), float3(1.0, 1.0, 1.0)); }\n" + "float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, " "1.0)); }\n"; } else @@ -346,6 +346,15 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */ ss << "#define BEGIN_ARRAY(type, size) {\n"; ss << "#define END_ARRAY }\n"; ss << "#define VECTOR_BROADCAST(type, value) ((type)(value))\n"; + ss << "uint packUnorm4x8(float4 value) {\n" + " uint4 packed = uint4(round(saturate(value) * 255.0));\n" + " return packed.x | (packed.y << 8) | (packed.z << 16) | (packed.w << 24);\n" + "}\n" + "\n" + "float4 unpackUnorm4x8(uint value) {\n" + " uint4 packed = uint4(value & 0xffu, (value >> 8) & 0xffu, (value >> 16) & 0xffu, value >> 24);\n" + " return float4(packed) / 255.0;\n" + "}\n"; } ss << "\n";