diff --git a/src/common/gsvector_neon.h b/src/common/gsvector_neon.h index 42d5c5fcb..cb0112a4a 100644 --- a/src/common/gsvector_neon.h +++ b/src/common/gsvector_neon.h @@ -1161,12 +1161,13 @@ public: { } + ALWAYS_INLINE explicit GSVector4i(const GSVector2i& v) : v4s(vcombine_s32(v.v2s, vcreate_s32(0))) {} + ALWAYS_INLINE explicit GSVector4i(s32 i) { *this = i; } - ALWAYS_INLINE explicit GSVector4i(int32x2_t m) : v4s(vcombine_s32(m, vcreate_s32(0))) {} ALWAYS_INLINE constexpr explicit GSVector4i(int32x4_t m) : v4s(m) {} - ALWAYS_INLINE explicit GSVector4i(const GSVector2& v); + ALWAYS_INLINE explicit GSVector4i(const GSVector2& v) : v4s(vcombine_s32(vcvt_s32_f32(v.v2s), vcreate_s32(0))) {} ALWAYS_INLINE explicit GSVector4i(const GSVector4& v); ALWAYS_INLINE static GSVector4i cast(const GSVector4& v); @@ -2205,6 +2206,8 @@ public: return GSVector4i(vcombine_s32(vld1_s32((const int32_t*)p), vcreate_s32(0))); } + ALWAYS_INLINE static GSVector4i loadl(const GSVector2i& v) { return GSVector4i(vcombine_s32(v.v2s, vcreate_s32(0))); } + template ALWAYS_INLINE static GSVector4i loadh(const void* p) { diff --git a/src/common/gsvector_nosimd.h b/src/common/gsvector_nosimd.h index 8146738cf..ed03504b2 100644 --- a/src/common/gsvector_nosimd.h +++ b/src/common/gsvector_nosimd.h @@ -941,6 +941,9 @@ public: } ALWAYS_INLINE GSVector4i(const GSVector4i& v) { std::memcpy(S32, v.S32, sizeof(S32)); } + + ALWAYS_INLINE explicit GSVector4i(const GSVector2& v) : S32{static_cast(v.x), static_cast(v.y), 0, 0} {} + ALWAYS_INLINE explicit GSVector4i(const GSVector2i& v) : S32{v.S32[0], v.S32[1], 0, 0} {} ALWAYS_INLINE explicit GSVector4i(s32 i) { *this = i; } @@ -1556,6 +1559,8 @@ public: return ret; } + ALWAYS_INLINE static GSVector4i loadl(const GSVector2i& v) { return loadl(&v); } + template ALWAYS_INLINE static GSVector4i loadh(const void* p) { @@ -1565,10 +1570,7 @@ public: return ret; } - ALWAYS_INLINE static GSVector4i loadh(const GSVector2i& v) - { - return loadh(&v); - } + ALWAYS_INLINE static GSVector4i loadh(const GSVector2i& v) { return loadh(&v); } template ALWAYS_INLINE static GSVector4i load(const void* p) diff --git a/src/common/gsvector_sse.h b/src/common/gsvector_sse.h index f51f17ec7..615558727 100644 --- a/src/common/gsvector_sse.h +++ b/src/common/gsvector_sse.h @@ -721,7 +721,7 @@ public: #endif } - ALWAYS_INLINE explicit GSVector2(const GSVector2i& v); + ALWAYS_INLINE explicit GSVector2(const GSVector2i& v) : m(_mm_cvtepi32_ps(v)) {} ALWAYS_INLINE GSVector2& operator=(float f) { @@ -1056,12 +1056,15 @@ public: { } - ALWAYS_INLINE explicit GSVector4i(const GSVector2i& v) { m = v.m; } + ALWAYS_INLINE explicit GSVector4i(const GSVector2i& v) { m = _mm_unpacklo_epi64(v.m, _mm_setzero_si128()); } + + ALWAYS_INLINE explicit GSVector4i(const GSVector2& v) + : m(_mm_unpacklo_epi64(_mm_cvttps_epi32(v), _mm_setzero_si128())) + { + } ALWAYS_INLINE explicit GSVector4i(s32 i) { *this = i; } - ALWAYS_INLINE explicit GSVector4i(const GSVector2& v); - ALWAYS_INLINE explicit GSVector4i(const GSVector4& v); ALWAYS_INLINE constexpr explicit GSVector4i(__m128i m) : m(m) {} @@ -1739,6 +1742,11 @@ public: return GSVector4i(_mm_loadl_epi64(static_cast(p))); } + ALWAYS_INLINE static GSVector4i loadl(const GSVector2i& v) + { + return GSVector4i(_mm_unpacklo_epi64(v.m, _mm_setzero_si128())); + } + template ALWAYS_INLINE static GSVector4i loadh(const void* p) { @@ -1958,7 +1966,10 @@ public: m = _mm_cvtepi32_ps(_mm_unpacklo_epi32(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(y))); } - ALWAYS_INLINE explicit GSVector4(const GSVector2& v) : m(v.m) {} + ALWAYS_INLINE explicit GSVector4(const GSVector2& v) + : m(_mm_castpd_ps(_mm_unpacklo_pd(_mm_castps_pd(v.m), _mm_setzero_pd()))) + { + } ALWAYS_INLINE explicit GSVector4(const GSVector2i& v) : m(_mm_castpd_ps(_mm_unpacklo_pd(_mm_castps_pd(_mm_cvtepi32_ps(v.m)), _mm_setzero_pd()))) { @@ -1979,7 +1990,7 @@ public: #endif } - ALWAYS_INLINE explicit GSVector4(const GSVector4i& v); + ALWAYS_INLINE explicit GSVector4(const GSVector4i& v) : m(_mm_cvtepi32_ps(v)) {} ALWAYS_INLINE static GSVector4 f64(double x, double y) { return GSVector4(_mm_castpd_ps(_mm_set_pd(y, x))); } ALWAYS_INLINE static GSVector4 f64(double x) { return GSVector4(_mm_castpd_ps(_mm_set1_pd(x))); } @@ -2428,10 +2439,7 @@ public: return GSVector4(_mm_movelh_ps(l.m, h.m)); } - ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l) - { - return GSVector4(_mm_movelh_ps(l.m, l.m)); - } + ALWAYS_INLINE static GSVector4 xyxy(const GSVector2& l) { return GSVector4(_mm_movelh_ps(l.m, l.m)); } #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const \ @@ -2491,11 +2499,6 @@ ALWAYS_INLINE GSVector2i::GSVector2i(const GSVector2& v) m = _mm_cvttps_epi32(v); } -ALWAYS_INLINE GSVector2::GSVector2(const GSVector2i& v) -{ - m = _mm_cvtepi32_ps(v); -} - ALWAYS_INLINE GSVector2i GSVector2i::cast(const GSVector2& v) { return GSVector2i(_mm_castps_si128(v.m)); @@ -2511,11 +2514,6 @@ ALWAYS_INLINE GSVector4i::GSVector4i(const GSVector4& v) m = _mm_cvttps_epi32(v); } -ALWAYS_INLINE GSVector4::GSVector4(const GSVector4i& v) -{ - m = _mm_cvtepi32_ps(v); -} - ALWAYS_INLINE GSVector4i GSVector4i::cast(const GSVector4& v) { return GSVector4i(_mm_castps_si128(v.m));