GPU: Implement MMPX texture filter

2025-06-06 19:45:33 +00:00 · 2025-04-06 23:11:55 +10:00 · 2025-04-06 23:11:55 +10:00 · dfa96e080d
commit dfa96e080d
parent 054e96f46b
5 changed files with 125 additions and 6 deletions
--- a/src/core/gpu_hw_shadergen.cpp
+++ b/src/core/gpu_hw_shadergen.cpp
@ -724,6 +724,114 @@ void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limi

 #undef P

+)";
+  }
+  else if (texture_filter == GPUTextureFilter::MMPX)
+  {
+    ss << "#define src(xoffs, yoffs) packUnorm4x8(SampleFromVRAM(texpage, clamp(bcoords + float2((xoffs), (yoffs)), "
+          "uv_limits.xy, uv_limits.zw)))\n";
+
+    /*
+     * This part of the shader is from MMPX.glc from https://casual-effects.com/research/McGuire2021PixelArt/index.html
+     * Copyright 2020 Morgan McGuire & Mara Gagiu.
+     * Provided under the Open Source MIT license https://opensource.org/licenses/MIT
+     */
+    ss << R"(
+uint luma(uint C) {
+    uint alpha = (C & 0xFF000000u) >> 24;
+    return (((C & 0x00FF0000u) >> 16) + ((C & 0x0000FF00u) >> 8) + (C & 0x000000FFu) + 1u) * (256u - alpha);
+}
+
+bool all_eq2(uint B, uint A0, uint A1) {
+    return ((B ^ A0) | (B ^ A1)) == 0u;
+}
+
+bool all_eq3(uint B, uint A0, uint A1, uint A2) {
+    return ((B ^ A0) | (B ^ A1) | (B ^ A2)) == 0u;
+}
+
+bool all_eq4(uint B, uint A0, uint A1, uint A2, uint A3) {
+    return ((B ^ A0) | (B ^ A1) | (B ^ A2) | (B ^ A3)) == 0u;
+}
+
+bool any_eq3(uint B, uint A0, uint A1, uint A2) {
+    return B == A0 || B == A1 || B == A2;
+}
+
+bool none_eq2(uint B, uint A0, uint A1) {
+    return (B != A0) && (B != A1);
+}
+
+bool none_eq4(uint B, uint A0, uint A1, uint A2, uint A3) {
+    return B != A0 && B != A1 && B != A2 && B != A3;
+}
+
+void FilteredSampleFromVRAM(TEXPAGE_VALUE texpage, float2 coords, float4 uv_limits, out float4 texcol, out float ialpha)
+{
+  float2 bcoords = floor(coords);
+
+  uint A = src(-1, -1), B = src(+0, -1), C = src(+1, -1);
+  uint D = src(-1, +0), E = src(+0, +0), F = src(+1, +0);
+  uint G = src(-1, +1), H = src(+0, +1), I = src(+1, +1);
+
+  uint J = E, K = E, L = E, M = E;
+
+  if (((A ^ E) | (B ^ E) | (C ^ E) | (D ^ E) | (F ^ E) | (G ^ E) | (H ^ E) | (I ^ E)) != 0u) {
+    uint P = src(+0, -2), S = src(+0, +2);
+    uint Q = src(-2, +0), R = src(+2, +0);
+    uint Bl = luma(B), Dl = luma(D), El = luma(E), Fl = luma(F), Hl = luma(H);
+
+    // 1:1 slope rules
+    if ((D == B && D != H && D != F) && (El >= Dl || E == A) && any_eq3(E, A, C, G) && ((El < Dl) || A != D || E != P || E != Q)) J = D;
+    if ((B == F && B != D && B != H) && (El >= Bl || E == C) && any_eq3(E, A, C, I) && ((El < Bl) || C != B || E != P || E != R)) K = B;
+    if ((H == D && H != F && H != B) && (El >= Hl || E == G) && any_eq3(E, A, G, I) && ((El < Hl) || G != H || E != S || E != Q)) L = H;
+    if ((F == H && F != B && F != D) && (El >= Fl || E == I) && any_eq3(E, C, G, I) && ((El < Fl) || I != H || E != R || E != S)) M = F;
+
+    // Intersection rules
+    if ((E != F && all_eq4(E, C, I, D, Q) && all_eq2(F, B, H)) && (F != src(+3, +0))) K = M = F;
+    if ((E != D && all_eq4(E, A, G, F, R) && all_eq2(D, B, H)) && (D != src(-3, +0))) J = L = D;
+    if ((E != H && all_eq4(E, G, I, B, P) && all_eq2(H, D, F)) && (H != src(+0, +3))) L = M = H;
+    if ((E != B && all_eq4(E, A, C, H, S) && all_eq2(B, D, F)) && (B != src(+0, -3))) J = K = B;
+    if (Bl < El && all_eq4(E, G, H, I, S) && none_eq4(E, A, D, C, F)) J = K = B;
+    if (Hl < El && all_eq4(E, A, B, C, P) && none_eq4(E, D, G, I, F)) L = M = H;
+    if (Fl < El && all_eq4(E, A, D, G, Q) && none_eq4(E, B, C, I, H)) K = M = F;
+    if (Dl < El && all_eq4(E, C, F, I, R) && none_eq4(E, B, A, G, H)) J = L = D;
+
+    // 2:1 slope rules
+    if (H != B) {
+      if (H != A && H != E && H != C) {
+        if (all_eq3(H, G, F, R) && none_eq2(H, D, src(+2, -1))) L = M;
+        if (all_eq3(H, I, D, Q) && none_eq2(H, F, src(-2, -1))) M = L;
+      }
+
+      if (B != I && B != G && B != E) {
+        if (all_eq3(B, A, F, R) && none_eq2(B, D, src(+2, +1))) J = K;
+        if (all_eq3(B, C, D, Q) && none_eq2(B, F, src(-2, +1))) K = J;
+      }
+    } // H !== B
+
+    if (F != D) {
+      if (D != I && D != E && D != C) {
+        if (all_eq3(D, A, H, S) && none_eq2(D, B, src(+1, +2))) J = L;
+        if (all_eq3(D, G, B, P) && none_eq2(D, H, src(+1, -2))) L = J;
+      }
+
+      if (F != E && F != A && F != G) {
+        if (all_eq3(F, C, H, S) && none_eq2(F, B, src(-1, +2))) K = M;
+        if (all_eq3(F, I, B, P) && none_eq2(F, H, src(-1, -2))) M = K;
+      }
+    } // F !== D
+  } // not constant
+
+  // select quadrant based on fractional part of texture coordinates
+  float2 fpart = frac(coords);
+  uint res = (fpart.x < 0.5f) ? ((fpart.y < 0.5f) ? J : L) : ((fpart.y < 0.5f) ? K : M);
+
+  ialpha = float(res != 0u);
+  texcol = unpackUnorm4x8(res);
+}
+
+#undef src
 )";
  }
 }
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@ -1531,7 +1531,7 @@ GPURenderer Settings::GetAutomaticRenderer()
 }

 static constexpr const std::array s_texture_filter_names = {
-  "Nearest", "Bilinear", "BilinearBinAlpha", "JINC2", "JINC2BinAlpha", "xBR", "xBRBinAlpha",
+  "Nearest", "Bilinear", "BilinearBinAlpha", "JINC2", "JINC2BinAlpha", "xBR", "xBRBinAlpha", "MMPX",
 };
 static constexpr const std::array s_texture_filter_display_names = {
  TRANSLATE_DISAMBIG_NOOP("Settings", "Nearest-Neighbor", "GPUTextureFilter"),
@ -1541,6 +1541,7 @@ static constexpr const std::array s_texture_filter_display_names = {
  TRANSLATE_DISAMBIG_NOOP("Settings", "JINC2 (Slow, No Edge Blending)", "GPUTextureFilter"),
  TRANSLATE_DISAMBIG_NOOP("Settings", "xBR (Very Slow)", "GPUTextureFilter"),
  TRANSLATE_DISAMBIG_NOOP("Settings", "xBR (Very Slow, No Edge Blending)", "GPUTextureFilter"),
+  TRANSLATE_DISAMBIG_NOOP("Settings", "MMPX (Slow)", "GPUTextureFilter"),
 };

 std::optional<GPUTextureFilter> Settings::ParseTextureFilterName(const char* str)
--- a/src/core/shader_cache_version.h
+++ b/src/core/shader_cache_version.h
@ -5,4 +5,4 @@

 #include "common/types.h"

-static constexpr u32 SHADER_CACHE_VERSION = 29;
+static constexpr u32 SHADER_CACHE_VERSION = 30;
--- a/src/core/types.h
+++ b/src/core/types.h
@ -97,6 +97,7 @@ enum class GPUTextureFilter : u8
  JINC2BinAlpha,
  xBR,
  xBRBinAlpha,
+  MMPX,
  Count
 };

--- a/src/util/shadergen.cpp
+++ b/src/util/shadergen.cpp
@ -300,10 +300,10 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */
    ss << "#define END_ARRAY )\n";
    ss << "#define VECTOR_BROADCAST(type, value) (type(value))\n";

-    ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n";
-    ss << "float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n";
-    ss << "float3 saturate(float3 value) { return clamp(value, float3(0.0, 0.0, 0.0), float3(1.0, 1.0, 1.0)); }\n";
-    ss << "float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, "
+    ss << "float saturate(float value) { return clamp(value, 0.0, 1.0); }\n"
+          "float2 saturate(float2 value) { return clamp(value, float2(0.0, 0.0), float2(1.0, 1.0)); }\n"
+          "float3 saturate(float3 value) { return clamp(value, float3(0.0, 0.0, 0.0), float3(1.0, 1.0, 1.0)); }\n"
+          "float4 saturate(float4 value) { return clamp(value, float4(0.0, 0.0, 0.0, 0.0), float4(1.0, 1.0, 1.0, "
          "1.0)); }\n";
  }
  else
@ -346,6 +346,15 @@ void ShaderGen::WriteHeader(std::stringstream& ss, bool enable_rov /* = false */
    ss << "#define BEGIN_ARRAY(type, size) {\n";
    ss << "#define END_ARRAY }\n";
    ss << "#define VECTOR_BROADCAST(type, value) ((type)(value))\n";
+    ss << "uint packUnorm4x8(float4 value) {\n"
+          "  uint4 packed = uint4(round(saturate(value) * 255.0));\n"
+          "  return packed.x | (packed.y << 8) | (packed.z << 16) | (packed.w << 24);\n"
+          "}\n"
+          "\n"
+          "float4 unpackUnorm4x8(uint value) {\n"
+          "  uint4 packed = uint4(value & 0xffu, (value >> 8) & 0xffu, (value >> 16) & 0xffu, value >> 24);\n"
+          "  return float4(packed) / 255.0;\n"
+          "}\n";
  }

  ss << "\n";