GPU: Implement MMPX texture filter

This commit is contained in:
Stenzek 2025-04-06 23:11:55 +10:00
parent 054e96f46b
commit dfa96e080d
No known key found for this signature in database
5 changed files with 125 additions and 6 deletions

View File

@ -724,6 +724,114 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi
#undef P
)";
}
else if (texture_filter == GPUTextureFilter::MMPX)
{
ss << "#define src(xoffs, yoffs) packUnorm4x8(SampleFromVRAM(texpage, clamp(bcoords + float2((xoffs), (yoffs)), "
"uv_limits.xy, uv_limits.zw)))\n";
/*
* This part of the shader is from MMPX.glc from https://casual-effects.com/research/McGuire2021PixelArt/index.html
* Copyright 2020 Morgan McGuire & Mara Gagiu.
* Provided under the Open Source MIT license https://opensource.org/licenses/MIT
*/
ss << R"(
uint luma(uint C) {
uint alpha = (C & 0xFF000000u) >> 24;
return (((C & 0x00FF0000u) >> 16) + ((C & 0x0000FF00u) >> 8) + (C & 0x000000FFu) + 1u) * (256u - alpha);
}
bool all_eq2(uint B, uint A0, uint A1) {
return ((B ^ A0) | (B ^ A1)) == 0u;
}
bool all_eq3(uint B, uint A0, uint A1, uint A2) {
return ((B ^ A0) | (B ^ A1) | (B ^ A2)) == 0u;
}
bool all_eq4(uint B, uint A0, uint A1, uint A2, uint A3) {
return ((B ^ A0) | (B ^ A1) | (B ^ A2) | (B ^ A3)) == 0u;
}
bool any_eq3(uint B, uint A0, uint A1, uint A2) {
return B == A0 || B == A1 || B == A2;
}
bool none_eq2(uint B, uint A0, uint A1) {
return (B != A0) && (B != A1);
}
bool none_eq4(uint B, uint A0, uint A1, uint A2, uint A3) {
return B != A0 && B != A1 && B != A2 && B != A3;
}
void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limits, out float4 texcol, out float ialpha)
{
float2 bcoords = floor(coords);
uint A = src(-1, -1), B = src(+0, -1), C = src(+1, -1);
uint D = src(-1, +0), E = src(+0, +0), F = src(+1, +0);
uint G = src(-1, +1), H = src(+0, +1), I = src(+1, +1);
uint J = E, K = E, L = E, M = E;
if (((A ^ E) | (B ^ E) | (C ^ E) | (D ^ E) | (F ^ E) | (G ^ E) | (H ^ E) | (I ^ E)) != 0u) {
uint P = src(+0, -2), S = src(+0, +2);
uint Q = src(-2, +0), R = src(+2, +0);
uint Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H);
// 1:1 slope rules
if ((D == B && D != H && D != F) && (El >= Dl || E == A) && any_eq3(E, A, C, G) && ((El < Dl) || A != D || E != P || E != Q)) J = D;
if ((B == F && B != D && B != H) && (El >= Bl || E == C) && any_eq3(E, A, C, I) && ((El < Bl) || C != B || E != P || E != R)) K = B;
if ((H == D && H != F && H != B) && (El >= Hl || E == G) && any_eq3(E, A, G, I) && ((El < Hl) || G != H || E != S || E != Q)) L = H;
if ((F == H && F != B && F != D) && (El >= Fl || E == I) && any_eq3(E, C, G, I) && ((El < Fl) || I != H || E != R || E != S)) M = F;
// Intersection rules
if ((E != F && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (F != src(+3, +0))) K = M = F;
if ((E != D && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (D != src(-3, +0))) J = L = D;
if ((E != H && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (H != src(+0, +3))) L = M = H;
if ((E != B && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (B != src(+0, -3))) J = K = B;
if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B;
if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H;
if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F;
if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D;
// 2:1 slope rules
if (H != B) {
if (H != A && H != E && H != C) {
if (all_eq3(H, G, F, R) && none_eq2(H, D, src(+2, -1))) L = M;
if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(-2, -1))) M = L;
}
if (B != I && B != G && B != E) {
if (all_eq3(B, A, F, R) && none_eq2(B, D, src(+2, +1))) J = K;
if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(-2, +1))) K = J;
}
} // H !== B
if (F != D) {
if (D != I && D != E && D != C) {
if (all_eq3(D, A, H, S) && none_eq2(D, B, src(+1, +2))) J = L;
if (all_eq3(D, G, B, P) && none_eq2(D, H, src(+1, -2))) L = J;
}
if (F != E && F != A && F != G) {
if (all_eq3(F, C, H, S) && none_eq2(F, B, src(-1, +2))) K = M;
if (all_eq3(F, I, B, P) && none_eq2(F, H, src(-1, -2))) M = K;
}
} // F !== D
} // not constant
// select quadrant based on fractional part of texture coordinates
float2 fpart = frac(coords);
uint res = (fpart.x < 0.5f) ? ((fpart.y < 0.5f) ? J : L) : ((fpart.y < 0.5f) ? K : M);
ialpha = float(res != 0u);
texcol = unpackUnorm4x8(res);
}
#undef src
)";
}
}

View File

@ -1531,7 +1531,7 @@ GPURenderer Settings::GetAutomaticRenderer()
}
static constexpr const std::array s_texture_filter_names = {
"Nearest", "Bilinear", "BilinearBinAlpha", "JINC2", "JINC2BinAlpha", "xBR", "xBRBinAlpha",
"Nearest", "Bilinear", "BilinearBinAlpha", "JINC2", "JINC2BinAlpha", "xBR", "xBRBinAlpha", "MMPX",
};
static constexpr const std::array s_texture_filter_display_names = {
TRANSLATE_DISAMBIG_NOOP("Settings", "Nearest-Neighbor", "GPUTextureFilter"),
@ -1541,6 +1541,7 @@ static constexpr const std::array s_texture_filter_display_names = {
TRANSLATE_DISAMBIG_NOOP("Settings", "JINC2 (Slow, No Edge Blending)", "GPUTextureFilter"),
TRANSLATE_DISAMBIG_NOOP("Settings", "xBR (Very Slow)", "GPUTextureFilter"),
TRANSLATE_DISAMBIG_NOOP("Settings", "xBR (Very Slow, No Edge Blending)", "GPUTextureFilter"),
TRANSLATE_DISAMBIG_NOOP("Settings", "MMPX (Slow)", "GPUTextureFilter"),
};
std::optional<GPUTextureFilter> Settings::ParseTextureFilterName(const char* str)

View File

@ -5,4 +5,4 @@
#include "common/types.h"
static constexpr u32 SHADER_CACHE_VERSION = 29;
static constexpr u32 SHADER_CACHE_VERSION = 30;

View File

@ -97,6 +97,7 @@ enum class GPUTextureFilter : u8
JINC2BinAlpha,
xBR,
xBRBinAlpha,
MMPX,
Count
};

View File

@ -300,10 +300,10 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */
ss << "#define END_ARRAY )\n";
ss << "#define VECTOR_BROADCAST(type, value) (type(value))\n";
ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n";
ss << "float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n";
ss << "float3 saturate(float3 value) { return clamp(value, float3(0.0, 0.0, 0.0), float3(1.0, 1.0, 1.0)); }\n";
ss << "float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, "
ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n"
"float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n"
"float3 saturate(float3 value) { return clamp(value, float3(0.0, 0.0, 0.0), float3(1.0, 1.0, 1.0)); }\n"
"float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, "
"1.0)); }\n";
}
else
@ -346,6 +346,15 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */
ss << "#define BEGIN_ARRAY(type, size) {\n";
ss << "#define END_ARRAY }\n";
ss << "#define VECTOR_BROADCAST(type, value) ((type)(value))\n";
ss << "uint packUnorm4x8(float4 value) {\n"
" uint4 packed = uint4(round(saturate(value) * 255.0));\n"
" return packed.x | (packed.y << 8) | (packed.z << 16) | (packed.w << 24);\n"
"}\n"
"\n"
"float4 unpackUnorm4x8(uint value) {\n"
" uint4 packed = uint4(value & 0xffu, (value >> 8) & 0xffu, (value >> 16) & 0xffu, value >> 24);\n"
" return float4(packed) / 255.0;\n"
"}\n";
}
ss << "\n";