mirror of
https://github.com/stenzek/duckstation.git
synced 2025-06-06 19:45:33 +00:00
Common: Further optimize alltrue()/allfalse() on ARM
This commit is contained in:
parent
0189e1ef81
commit
57f3fee28c
@ -6,6 +6,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
|
||||
#define GSVECTOR_HAS_FAST_INT_SHUFFLE8 1
|
||||
#define GSVECTOR_HAS_SRLV 1
|
||||
@ -646,25 +647,10 @@ public:
|
||||
|
||||
ALWAYS_INLINE bool alltrue() const
|
||||
{
|
||||
// MSB should be set in all 8-bit lanes.
|
||||
#ifdef CPU_ARCH_ARM64
|
||||
return (vminv_u8(vreinterpret_u8_s32(v2s)) & 0x80) == 0x80;
|
||||
#else
|
||||
return ((vget_lane_u32(vreinterpret_u32_s32(v2s), 0) & vget_lane_u32(vreinterpret_u32_s32(v2s), 1) & 0x80808080u) ==
|
||||
0x80808080u);
|
||||
#endif
|
||||
return (vget_lane_u64(vreinterpret_u64_s32(v2s), 0) == UINT64_C(0xFFFFFFFFFFFFFFFF));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool allfalse() const
|
||||
{
|
||||
// MSB should be clear in all 8-bit lanes.
|
||||
#ifdef CPU_ARCH_ARM64
|
||||
return (vmaxv_u32(vreinterpret_u8_s32(v2s)) & 0x80) != 0x80;
|
||||
#else
|
||||
return (
|
||||
((vget_lane_u32(vreinterpret_u32_s32(v2s), 0) | vget_lane_u32(vreinterpret_u32_s32(v2s), 1)) & 0x80808080u) == 0);
|
||||
#endif
|
||||
}
|
||||
ALWAYS_INLINE bool allfalse() const { return (vget_lane_u64(vreinterpret_u64_s32(v2s), 0) == UINT64_C(0)); }
|
||||
|
||||
template<int i>
|
||||
ALWAYS_INLINE GSVector2i insert8(int a) const
|
||||
@ -910,9 +896,12 @@ public:
|
||||
return (vget_lane_u32(masks, 0) | (vget_lane_u32(masks, 1) << 1));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool alltrue() const { return (vget_lane_u64(vreinterpret_u64_f32(v2s), 0) == 0xFFFFFFFFFFFFFFFFULL); }
|
||||
ALWAYS_INLINE bool alltrue() const
|
||||
{
|
||||
return (vget_lane_u64(vreinterpret_u64_f32(v2s), 0) == UINT64_C(0xFFFFFFFFFFFFFFFF));
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool allfalse() const { return (vget_lane_u64(vreinterpret_u64_f32(v2s), 0) == 0); }
|
||||
ALWAYS_INLINE bool allfalse() const { return (vget_lane_u64(vreinterpret_u64_f32(v2s), 0) == UINT64_C(0)); }
|
||||
|
||||
ALWAYS_INLINE GSVector2 replace_nan(const GSVector2& v) const { return v.blend32(*this, *this == *this); }
|
||||
|
||||
@ -2110,23 +2099,20 @@ public:
|
||||
|
||||
ALWAYS_INLINE bool alltrue() const
|
||||
{
|
||||
// MSB should be set in all 8-bit lanes.
|
||||
#ifdef CPU_ARCH_ARM64
|
||||
return (vminvq_u8(vreinterpretq_u8_s32(v4s)) & 0x80) == 0x80;
|
||||
return (vminvq_u32(vreinterpretq_u32_s32(v4s)) == UINT32_C(0xFFFFFFFF));
|
||||
#else
|
||||
const uint32x2_t res = vreinterpret_u32_s32(vand_s32(vget_low_s32(v4s), vget_high_s32(v4s)));
|
||||
return ((vget_lane_u32(res, 0) & vget_lane_u32(res, 1) & 0x80808080u) == 0x80808080u);
|
||||
return (vget_lane_u64(vreinterpret_u64_s32(vand_s32(vget_low_s32(v4s), vget_high_s32(v4s))), 0) ==
|
||||
UINT64_C(0xFFFFFFFFFFFFFFFF));
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool allfalse() const
|
||||
{
|
||||
// MSB should be clear in all 8-bit lanes.
|
||||
#ifdef CPU_ARCH_ARM64
|
||||
return (vmaxvq_u32(vreinterpretq_u8_s32(v4s)) & 0x80) != 0x80;
|
||||
return (vmaxvq_u32(vreinterpretq_u32_s32(v4s)) == UINT32_C(0));
|
||||
#else
|
||||
const uint32x2_t res = vreinterpret_u32_s32(vorr_s32(vget_low_s32(v4s), vget_high_s32(v4s)));
|
||||
return ((vget_lane_u32(res, 0) | vget_lane_u32(res, 1) & 0x80808080u) == 0);
|
||||
return (vget_lane_u64(vreinterpret_u64_s32(vorr_s32(vget_low_s32(v4s), vget_high_s32(v4s))), 0) == UINT64_C(0));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2727,13 +2713,25 @@ public:
|
||||
|
||||
ALWAYS_INLINE bool alltrue() const
|
||||
{
|
||||
// return mask() == 0xf;
|
||||
return ~(vgetq_lane_u64(vreinterpretq_u64_f32(v4s), 0) & vgetq_lane_u64(vreinterpretq_u64_f32(v4s), 1)) == 0;
|
||||
#ifdef CPU_ARCH_ARM64
|
||||
return (vminvq_u32(vreinterpretq_u32_f32(v4s)) == UINT32_C(0xFFFFFFFF));
|
||||
#else
|
||||
|
||||
return (vget_lane_u64(vreinterpret_u64_u32(vand_u32(vget_low_u32(vreinterpretq_u32_f32(v4s)),
|
||||
vget_high_u32(vreinterpretq_u32_f32(v4s)))),
|
||||
0) == UINT64_C(0xFFFFFFFFFFFFFFFF));
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE bool allfalse() const
|
||||
{
|
||||
return (vgetq_lane_u64(vreinterpretq_u64_f32(v4s), 0) | vgetq_lane_u64(vreinterpretq_u64_f32(v4s), 1)) == 0;
|
||||
#ifdef CPU_ARCH_ARM64
|
||||
return (vmaxvq_u32(vreinterpretq_u32_f32(v4s)) == UINT32_C(0));
|
||||
#else
|
||||
return (vget_lane_u64(vreinterpret_u64_u32(vorr_u32(vget_low_u32(vreinterpretq_u32_f32(v4s)),
|
||||
vget_high_u32(vreinterpretq_u32_f32(v4s)))),
|
||||
0) == UINT64_C(0));
|
||||
#endif
|
||||
}
|
||||
|
||||
ALWAYS_INLINE GSVector4 replace_nan(const GSVector4& v) const { return v.blend32(*this, *this == *this); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user