From e1c876671ad4cbc34b90fda6f66128004b808dc1 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 22 Sep 2024 14:15:06 +1000 Subject: [PATCH] Misc: More iNN => sNN --- src/common/gsvector_neon.h | 64 +++++++++++++++++----------------- src/common/gsvector_nosimd.h | 64 +++++++++++++++++----------------- src/common/gsvector_sse.h | 64 +++++++++++++++++----------------- src/core/cdrom.cpp | 2 +- src/core/gpu.h | 6 ++-- src/core/gpu_hw.cpp | 6 ++-- src/core/gpu_sw.cpp | 16 ++++----- src/core/gpu_sw_rasterizer.inl | 8 ++--- 8 files changed, 115 insertions(+), 115 deletions(-) diff --git a/src/common/gsvector_neon.h b/src/common/gsvector_neon.h index 24b03cf84..39365a66e 100644 --- a/src/common/gsvector_neon.h +++ b/src/common/gsvector_neon.h @@ -93,17 +93,17 @@ public: ALWAYS_INLINE operator int32x2_t() const { return v2s; } - ALWAYS_INLINE GSVector2i sat_i8(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s8(const GSVector2i& min, const GSVector2i& max) const { - return max_i8(min).min_i8(max); + return max_s8(min).min_s8(max); } - ALWAYS_INLINE GSVector2i sat_i16(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s16(const GSVector2i& min, const GSVector2i& max) const { - return max_i16(min).min_i16(max); + return max_s16(min).min_s16(max); } - ALWAYS_INLINE GSVector2i sat_i32(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s32(const GSVector2i& min, const GSVector2i& max) const { - return max_i32(min).min_i32(max); + return max_s32(min).min_s32(max); } ALWAYS_INLINE GSVector2i sat_u8(const GSVector2i& min, const GSVector2i& max) const @@ -119,29 +119,29 @@ public: return max_u32(min).min_u32(max); } - ALWAYS_INLINE GSVector2i min_i8(const GSVector2i& v) const + ALWAYS_INLINE GSVector2i min_s8(const GSVector2i& v) const { return GSVector2i(vreinterpret_s32_s8(vmin_s8(vreinterpret_s8_s32(v2s), vreinterpret_s8_s32(v.v2s)))); } - ALWAYS_INLINE GSVector2i max_i8(const GSVector2i& v) const + ALWAYS_INLINE GSVector2i max_s8(const GSVector2i& v) const { return GSVector2i(vreinterpret_s32_s8(vmax_s8(vreinterpret_s8_s32(v2s), vreinterpret_s8_s32(v.v2s)))); } - ALWAYS_INLINE GSVector2i min_i16(const GSVector2i& v) const + ALWAYS_INLINE GSVector2i min_s16(const GSVector2i& v) const { return GSVector2i(vreinterpret_s32_s16(vmin_s16(vreinterpret_s16_s32(v2s), vreinterpret_s16_s32(v.v2s)))); } - ALWAYS_INLINE GSVector2i max_i16(const GSVector2i& v) const + ALWAYS_INLINE GSVector2i max_s16(const GSVector2i& v) const { return GSVector2i(vreinterpret_s32_s16(vmax_s16(vreinterpret_s16_s32(v2s), vreinterpret_s16_s32(v.v2s)))); } - ALWAYS_INLINE GSVector2i min_i32(const GSVector2i& v) const { return GSVector2i(vmin_s32(v2s, v.v2s)); } + ALWAYS_INLINE GSVector2i min_s32(const GSVector2i& v) const { return GSVector2i(vmin_s32(v2s, v.v2s)); } - ALWAYS_INLINE GSVector2i max_i32(const GSVector2i& v) const { return GSVector2i(vmax_s32(v2s, v.v2s)); } + ALWAYS_INLINE GSVector2i max_s32(const GSVector2i& v) const { return GSVector2i(vmax_s32(v2s, v.v2s)); } ALWAYS_INLINE GSVector2i min_u8(const GSVector2i& v) const { @@ -1136,37 +1136,37 @@ public: #endif } - ALWAYS_INLINE GSVector4i runion(const GSVector4i& a) const { return min_i32(a).upl64(max_i32(a).srl<8>()); } + ALWAYS_INLINE GSVector4i runion(const GSVector4i& a) const { return min_s32(a).upl64(max_s32(a).srl<8>()); } - ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& a) const { return sat_i32(a); } + ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& a) const { return sat_s32(a); } ALWAYS_INLINE bool rintersects(const GSVector4i& v) const { return !rintersect(v).rempty(); } ALWAYS_INLINE bool rcontains(const GSVector4i& v) const { return rintersect(v).eq(v); } ALWAYS_INLINE u32 rgba32() const { return static_cast(ps32().pu16().extract32<0>()); } - ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& min, const GSVector4i& max) const { - return max_i8(min).min_i8(max); + return max_s8(min).min_s8(max); } - ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& minmax) const { - return max_i8(minmax.xyxy()).min_i8(minmax.zwzw()); + return max_s8(minmax.xyxy()).min_s8(minmax.zwzw()); } - ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& min, const GSVector4i& max) const { - return max_i16(min).min_i16(max); + return max_s16(min).min_s16(max); } - ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& minmax) const { - return max_i16(minmax.xyxy()).min_i16(minmax.zwzw()); + return max_s16(minmax.xyxy()).min_s16(minmax.zwzw()); } - ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& min, const GSVector4i& max) const { - return max_i32(min).min_i32(max); + return max_s32(min).min_s32(max); } - ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& minmax) const { - return max_i32(minmax.xyxy()).min_i32(minmax.zwzw()); + return max_s32(minmax.xyxy()).min_s32(minmax.zwzw()); } ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& min, const GSVector4i& max) const @@ -1194,29 +1194,29 @@ public: return max_u32(minmax.xyxy()).min_u32(minmax.zwzw()); } - ALWAYS_INLINE GSVector4i min_i8(const GSVector4i& v) const + ALWAYS_INLINE GSVector4i min_s8(const GSVector4i& v) const { return GSVector4i(vreinterpretq_s32_s8(vminq_s8(vreinterpretq_s8_s32(v4s), vreinterpretq_s8_s32(v.v4s)))); } - ALWAYS_INLINE GSVector4i max_i8(const GSVector4i& v) const + ALWAYS_INLINE GSVector4i max_s8(const GSVector4i& v) const { return GSVector4i(vreinterpretq_s32_s8(vmaxq_s8(vreinterpretq_s8_s32(v4s), vreinterpretq_s8_s32(v.v4s)))); } - ALWAYS_INLINE GSVector4i min_i16(const GSVector4i& v) const + ALWAYS_INLINE GSVector4i min_s16(const GSVector4i& v) const { return GSVector4i(vreinterpretq_s32_s16(vminq_s16(vreinterpretq_s16_s32(v4s), vreinterpretq_s16_s32(v.v4s)))); } - ALWAYS_INLINE GSVector4i max_i16(const GSVector4i& v) const + ALWAYS_INLINE GSVector4i max_s16(const GSVector4i& v) const { return GSVector4i(vreinterpretq_s32_s16(vmaxq_s16(vreinterpretq_s16_s32(v4s), vreinterpretq_s16_s32(v.v4s)))); } - ALWAYS_INLINE GSVector4i min_i32(const GSVector4i& v) const { return GSVector4i(vminq_s32(v4s, v.v4s)); } + ALWAYS_INLINE GSVector4i min_s32(const GSVector4i& v) const { return GSVector4i(vminq_s32(v4s, v.v4s)); } - ALWAYS_INLINE GSVector4i max_i32(const GSVector4i& v) const { return GSVector4i(vmaxq_s32(v4s, v.v4s)); } + ALWAYS_INLINE GSVector4i max_s32(const GSVector4i& v) const { return GSVector4i(vmaxq_s32(v4s, v.v4s)); } ALWAYS_INLINE GSVector4i min_u8(const GSVector4i& v) const { diff --git a/src/common/gsvector_nosimd.h b/src/common/gsvector_nosimd.h index 0ddc9d9f7..3d268e9d7 100644 --- a/src/common/gsvector_nosimd.h +++ b/src/common/gsvector_nosimd.h @@ -130,17 +130,17 @@ public: y = i; } - ALWAYS_INLINE GSVector2i sat_i8(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s8(const GSVector2i& min, const GSVector2i& max) const { - return max_i8(min).min_i8(max); + return max_s8(min).min_s8(max); } - ALWAYS_INLINE GSVector2i sat_i16(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s16(const GSVector2i& min, const GSVector2i& max) const { - return max_i16(min).min_i16(max); + return max_s16(min).min_s16(max); } - ALWAYS_INLINE GSVector2i sat_i32(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s32(const GSVector2i& min, const GSVector2i& max) const { - return max_i32(min).min_i32(max); + return max_s32(min).min_s32(max); } ALWAYS_INLINE GSVector2i sat_u8(const GSVector2i& min, const GSVector2i& max) const @@ -156,12 +156,12 @@ public: return max_u32(min).min_u32(max); } - GSVector2i min_i8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); } - GSVector2i max_i8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); } - GSVector2i min_i16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); } - GSVector2i max_i16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); } - GSVector2i min_i32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); } - GSVector2i max_i32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); } + GSVector2i min_s8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); } + GSVector2i max_s8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); } + GSVector2i min_s16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); } + GSVector2i max_s16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); } + GSVector2i min_s32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); } + GSVector2i max_s32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); } GSVector2i min_u8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = std::min(U8[i], v.U8[i])); } GSVector2i max_u8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = std::max(U8[i], v.U8[i])); } @@ -952,37 +952,37 @@ public: ALWAYS_INLINE bool rempty() const { return lt32(zwzw()).mask() != 0x00ff; } // TODO: Optimize for no-simd, this generates crap code. - ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_i32(v).upl64(max_i32(v).srl<8>()); } + ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_s32(v).upl64(max_s32(v).srl<8>()); } - ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_i32(v); } + ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_s32(v); } ALWAYS_INLINE bool rintersects(const GSVector4i& v) const { return !rintersect(v).rempty(); } ALWAYS_INLINE bool rcontains(const GSVector4i& v) const { return rintersect(v).eq(v); } ALWAYS_INLINE u32 rgba32() const { return static_cast(ps32().pu16().extract32<0>()); } - ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& min, const GSVector4i& max) const { - return max_i8(min).min_i8(max); + return max_s8(min).min_s8(max); } - ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& minmax) const { - return max_i8(minmax.xyxy()).min_i8(minmax.zwzw()); + return max_s8(minmax.xyxy()).min_s8(minmax.zwzw()); } - ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& min, const GSVector4i& max) const { - return max_i16(min).min_i16(max); + return max_s16(min).min_s16(max); } - ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& minmax) const { - return max_i16(minmax.xyxy()).min_i16(minmax.zwzw()); + return max_s16(minmax.xyxy()).min_s16(minmax.zwzw()); } - ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& min, const GSVector4i& max) const { - return max_i32(min).min_i32(max); + return max_s32(min).min_s32(max); } - ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& minmax) const { - return max_i32(minmax.xyxy()).min_i32(minmax.zwzw()); + return max_s32(minmax.xyxy()).min_s32(minmax.zwzw()); } ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& min, const GSVector4i& max) const @@ -1010,12 +1010,12 @@ public: return max_u32(minmax.xyxy()).min_u32(minmax.zwzw()); } - GSVector4i min_i8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); } - GSVector4i max_i8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); } - GSVector4i min_i16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); } - GSVector4i max_i16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); } - GSVector4i min_i32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); } - GSVector4i max_i32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); } + GSVector4i min_s8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); } + GSVector4i max_s8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); } + GSVector4i min_s16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); } + GSVector4i max_s16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); } + GSVector4i min_s32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); } + GSVector4i max_s32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); } GSVector4i min_u8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = std::min(U8[i], v.U8[i])); } GSVector4i max_u8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = std::max(U8[i], v.U8[i])); } diff --git a/src/common/gsvector_sse.h b/src/common/gsvector_sse.h index 33eeb4992..d01332116 100644 --- a/src/common/gsvector_sse.h +++ b/src/common/gsvector_sse.h @@ -102,17 +102,17 @@ public: ALWAYS_INLINE operator __m128i() const { return m; } - ALWAYS_INLINE GSVector2i sat_i8(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s8(const GSVector2i& min, const GSVector2i& max) const { - return max_i8(min).min_i8(max); + return max_s8(min).min_s8(max); } - ALWAYS_INLINE GSVector2i sat_i16(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s16(const GSVector2i& min, const GSVector2i& max) const { - return max_i16(min).min_i16(max); + return max_s16(min).min_s16(max); } - ALWAYS_INLINE GSVector2i sat_i32(const GSVector2i& min, const GSVector2i& max) const + ALWAYS_INLINE GSVector2i sat_s32(const GSVector2i& min, const GSVector2i& max) const { - return max_i32(min).min_i32(max); + return max_s32(min).min_s32(max); } ALWAYS_INLINE GSVector2i sat_u8(const GSVector2i& min, const GSVector2i& max) const @@ -128,12 +128,12 @@ public: return max_u32(min).min_u32(max); } - ALWAYS_INLINE GSVector2i min_i8(const GSVector2i& v) const { return GSVector2i(_mm_min_epi8(m, v)); } - ALWAYS_INLINE GSVector2i max_i8(const GSVector2i& v) const { return GSVector2i(_mm_max_epi8(m, v)); } - ALWAYS_INLINE GSVector2i min_i16(const GSVector2i& v) const { return GSVector2i(_mm_min_epi16(m, v)); } - ALWAYS_INLINE GSVector2i max_i16(const GSVector2i& v) const { return GSVector2i(_mm_max_epi16(m, v)); } - ALWAYS_INLINE GSVector2i min_i32(const GSVector2i& v) const { return GSVector2i(_mm_min_epi32(m, v)); } - ALWAYS_INLINE GSVector2i max_i32(const GSVector2i& v) const { return GSVector2i(_mm_max_epi32(m, v)); } + ALWAYS_INLINE GSVector2i min_s8(const GSVector2i& v) const { return GSVector2i(_mm_min_epi8(m, v)); } + ALWAYS_INLINE GSVector2i max_s8(const GSVector2i& v) const { return GSVector2i(_mm_max_epi8(m, v)); } + ALWAYS_INLINE GSVector2i min_s16(const GSVector2i& v) const { return GSVector2i(_mm_min_epi16(m, v)); } + ALWAYS_INLINE GSVector2i max_s16(const GSVector2i& v) const { return GSVector2i(_mm_max_epi16(m, v)); } + ALWAYS_INLINE GSVector2i min_s32(const GSVector2i& v) const { return GSVector2i(_mm_min_epi32(m, v)); } + ALWAYS_INLINE GSVector2i max_s32(const GSVector2i& v) const { return GSVector2i(_mm_max_epi32(m, v)); } ALWAYS_INLINE GSVector2i min_u8(const GSVector2i& v) const { return GSVector2i(_mm_min_epu8(m, v)); } ALWAYS_INLINE GSVector2i max_u8(const GSVector2i& v) const { return GSVector2i(_mm_max_epu8(m, v)); } @@ -873,37 +873,37 @@ public: ALWAYS_INLINE bool rempty() const { return lt32(zwzw()).mask() != 0x00ff; } - ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_i32(v).blend32<0xc>(max_i32(v)); } + ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_s32(v).blend32<0xc>(max_s32(v)); } - ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_i32(v); } + ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_s32(v); } ALWAYS_INLINE bool rintersects(const GSVector4i& v) const { return !rintersect(v).rempty(); } ALWAYS_INLINE bool rcontains(const GSVector4i& v) const { return rintersect(v).eq(v); } ALWAYS_INLINE u32 rgba32() const { return static_cast(ps32().pu16().extract32<0>()); } - ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& min, const GSVector4i& max) const { - return max_i8(min).min_i8(max); + return max_s8(min).min_s8(max); } - ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s8(const GSVector4i& minmax) const { - return max_i8(minmax.xyxy()).min_i8(minmax.zwzw()); + return max_s8(minmax.xyxy()).min_s8(minmax.zwzw()); } - ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& min, const GSVector4i& max) const { - return max_i16(min).min_i16(max); + return max_s16(min).min_s16(max); } - ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s16(const GSVector4i& minmax) const { - return max_i16(minmax.xyxy()).min_i16(minmax.zwzw()); + return max_s16(minmax.xyxy()).min_s16(minmax.zwzw()); } - ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& min, const GSVector4i& max) const + ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& min, const GSVector4i& max) const { - return max_i32(min).min_i32(max); + return max_s32(min).min_s32(max); } - ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& minmax) const + ALWAYS_INLINE GSVector4i sat_s32(const GSVector4i& minmax) const { - return max_i32(minmax.xyxy()).min_i32(minmax.zwzw()); + return max_s32(minmax.xyxy()).min_s32(minmax.zwzw()); } ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& min, const GSVector4i& max) const @@ -931,12 +931,12 @@ public: return max_u32(minmax.xyxy()).min_u32(minmax.zwzw()); } - ALWAYS_INLINE GSVector4i min_i8(const GSVector4i& v) const { return GSVector4i(_mm_min_epi8(m, v)); } - ALWAYS_INLINE GSVector4i max_i8(const GSVector4i& v) const { return GSVector4i(_mm_max_epi8(m, v)); } - ALWAYS_INLINE GSVector4i min_i16(const GSVector4i& v) const { return GSVector4i(_mm_min_epi16(m, v)); } - ALWAYS_INLINE GSVector4i max_i16(const GSVector4i& v) const { return GSVector4i(_mm_max_epi16(m, v)); } - ALWAYS_INLINE GSVector4i min_i32(const GSVector4i& v) const { return GSVector4i(_mm_min_epi32(m, v)); } - ALWAYS_INLINE GSVector4i max_i32(const GSVector4i& v) const { return GSVector4i(_mm_max_epi32(m, v)); } + ALWAYS_INLINE GSVector4i min_s8(const GSVector4i& v) const { return GSVector4i(_mm_min_epi8(m, v)); } + ALWAYS_INLINE GSVector4i max_s8(const GSVector4i& v) const { return GSVector4i(_mm_max_epi8(m, v)); } + ALWAYS_INLINE GSVector4i min_s16(const GSVector4i& v) const { return GSVector4i(_mm_min_epi16(m, v)); } + ALWAYS_INLINE GSVector4i max_s16(const GSVector4i& v) const { return GSVector4i(_mm_max_epi16(m, v)); } + ALWAYS_INLINE GSVector4i min_s32(const GSVector4i& v) const { return GSVector4i(_mm_min_epi32(m, v)); } + ALWAYS_INLINE GSVector4i max_s32(const GSVector4i& v) const { return GSVector4i(_mm_max_epi32(m, v)); } ALWAYS_INLINE GSVector4i min_u8(const GSVector4i& v) const { return GSVector4i(_mm_min_epu8(m, v)); } ALWAYS_INLINE GSVector4i max_u8(const GSVector4i& v) const { return GSVector4i(_mm_max_epu8(m, v)); } diff --git a/src/core/cdrom.cpp b/src/core/cdrom.cpp index 9cb19d617..1dc08f458 100644 --- a/src/core/cdrom.cpp +++ b/src/core/cdrom.cpp @@ -3525,7 +3525,7 @@ static s16 GetPeakVolume(const u8* raw_sector, u8 channel) GSVector4i v_peak = GSVector4i::zero(); for (u32 i = 0; i < NUM_SAMPLES; i += 8) { - v_peak = v_peak.max_i16(GSVector4i::load(current_ptr)); + v_peak = v_peak.max_s16(GSVector4i::load(current_ptr)); current_ptr += sizeof(v_peak); } diff --git a/src/core/gpu.h b/src/core/gpu.h index d1d183ee9..fa76559f8 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -341,9 +341,9 @@ protected: // TODO: Coordinates are exclusive, so off by one here... const GSVector2i clamp_min = GSVector2i::load(&m_clamped_drawing_area.x); const GSVector2i clamp_max = GSVector2i::load(&m_clamped_drawing_area.z); - v1 = v1.sat_i32(clamp_min, clamp_max); - v2 = v2.sat_i32(clamp_min, clamp_max); - v3 = v3.sat_i32(clamp_min, clamp_max); + v1 = v1.sat_s32(clamp_min, clamp_max); + v2 = v2.sat_s32(clamp_min, clamp_max); + v3 = v3.sat_s32(clamp_min, clamp_max); TickCount pixels = std::abs((v1.x * v2.y + v2.x * v3.y + v3.x * v1.y - v1.x * v3.y - v2.x * v1.y - v3.x * v2.y) / 2); diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 87a96e05c..745f0f088 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -2713,7 +2713,7 @@ void GPU_HW::LoadVertices() const GSVector2i vstart_pos = GSVector2i(start_pos.x + m_drawing_offset.x, start_pos.y + m_drawing_offset.y); const GSVector2i vend_pos = GSVector2i(end_pos.x + m_drawing_offset.x, end_pos.y + m_drawing_offset.y); const GSVector4i bounds = GSVector4i::xyxy(vstart_pos, vend_pos); - const GSVector4i rect = GSVector4i::xyxy(vstart_pos.min_i32(vend_pos), vstart_pos.max_i32(vend_pos)) + const GSVector4i rect = GSVector4i::xyxy(vstart_pos.min_s32(vend_pos), vstart_pos.max_s32(vend_pos)) .add32(GSVector4i::cxpr(0, 0, 1, 1)); const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); @@ -2773,7 +2773,7 @@ void GPU_HW::LoadVertices() const GSVector2i end_pos = GSVector2i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y); const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos); const GSVector4i rect = - GSVector4i::xyxy(start_pos.min_i32(end_pos), start_pos.max_i32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) { @@ -2861,7 +2861,7 @@ ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) uv_rect = ((uv_rect & twin.xyxy()) | twin.zwzw()); // Min could be greater than max after applying window, correct for it. - uv_rect = uv_rect.min_i32(uv_rect.zwzw()).max_i32(uv_rect.xyxy()); + uv_rect = uv_rect.min_s32(uv_rect.zwzw()).max_s32(uv_rect.xyxy()); } const GPUTextureMode tmode = m_draw_mode.mode_reg.texture_mode; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 95cef32d5..6dd0493c4 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -533,10 +533,10 @@ void GPU_SW::DispatchRenderCommand() } // Cull polygons which are too large. - const GSVector2i min_pos_12 = positions[1].min_i32(positions[2]); - const GSVector2i max_pos_12 = positions[1].max_i32(positions[2]); - const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min_i32(positions[0])) - .upl64(GSVector4i(max_pos_12.max_i32(positions[0]))) + const GSVector2i min_pos_12 = positions[1].min_s32(positions[2]); + const GSVector2i max_pos_12 = positions[1].max_s32(positions[2]); + const GSVector4i draw_rect_012 = GSVector4i(min_pos_12.min_s32(positions[0])) + .upl64(GSVector4i(max_pos_12.max_s32(positions[0]))) .add32(GSVector4i::cxpr(0, 0, 1, 1)); const bool first_tri_culled = (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || @@ -558,8 +558,8 @@ void GPU_SW::DispatchRenderCommand() // quads if (rc.quad_polygon) { - const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_i32(positions[3])) - .upl64(GSVector4i(max_pos_12.max_i32(positions[3]))) + const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(positions[3])) + .upl64(GSVector4i(max_pos_12.max_s32(positions[3]))) .add32(GSVector4i::cxpr(0, 0, 1, 1)); // Cull polygons which are too large. @@ -680,7 +680,7 @@ void GPU_SW::DispatchRenderCommand() const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); - const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i rect = v0.min_s32(v1).xyxy(v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) @@ -718,7 +718,7 @@ void GPU_SW::DispatchRenderCommand() const GSVector4i v0 = GSVector4i::loadl(&cmd->vertices[0].x); const GSVector4i v1 = GSVector4i::loadl(&cmd->vertices[1].x); - const GSVector4i rect = v0.min_i32(v1).xyxy(v0.max_i32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector4i rect = v0.min_s32(v1).xyxy(v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) diff --git a/src/core/gpu_sw_rasterizer.inl b/src/core/gpu_sw_rasterizer.inl index be4ad94b2..f1a8930cc 100644 --- a/src/core/gpu_sw_rasterizer.inl +++ b/src/core/gpu_sw_rasterizer.inl @@ -580,8 +580,8 @@ ShadePixel(const PixelVectors& pv, GPUTextureMode texture_mode, // Convert to 5bit. if constexpr (dithering_enable) { - rg = rg.sra16<4>().add16(dither).max_i16(GSVectorNi::zero()).sra16<3>(); - ba = ba.sra16<4>().add16(dither).max_i16(GSVectorNi::zero()).sra16<3>(); + rg = rg.sra16<4>().add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); + ba = ba.sra16<4>().add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); } else { @@ -606,8 +606,8 @@ ShadePixel(const PixelVectors& pv, GPUTextureMode texture_mode, // Non-textured transparent polygons don't set bit 15, but are treated as transparent. if constexpr (dithering_enable) { - GSVectorNi rg = vertex_color_rg.add16(dither).max_i16(GSVectorNi::zero()).sra16<3>(); - GSVectorNi ba = vertex_color_ba.add16(dither).max_i16(GSVectorNi::zero()).sra16<3>(); + GSVectorNi rg = vertex_color_rg.add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); + GSVectorNi ba = vertex_color_ba.add16(dither).max_s16(GSVectorNi::zero()).sra16<3>(); // Clamp to 5bit. We use 32bit for BA to set a to zero. rg = rg.min_u16(GSVectorNi::cxpr16(0x1F));