duckstation/src/core/cpu_pgxp.cpp
Stenzek 25b0bb752a
GPU/HW: Try truncating culled vertices
What is this monstrosity? Final Fantasy VIII relies on X coordinates
being truncated during scanline drawing, with negative coordinates
becoming positive and vice versa. Fortunately the bits that we need
are consistent across the entire polygon, so we can get away with
truncating the vertices. However, we can't do this to all vertices,
because other game's vertices break in various ways. For example,
+1024 becomes -1024, which is a valid vertex position as the ending
coordinate is exclusive. Therefore, 1024 is never truncated, only
1023. Luckily, FF8's vertices get culled as they do not intersect
with the clip rectangle, so we can do this fixup only when culled,
and everything seems happy.
2024-12-28 20:24:21 +10:00

1461 lines
43 KiB
C++

// SPDX-FileCopyrightText: 2016 iCatButler, 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
//
// This file has been completely rewritten over the years compared to the original PCSXR-PGXP release.
// No original code remains. The original copyright notice is included above for historical purposes.
//
#include "cpu_pgxp.h"
#include "bus.h"
#include "cpu_core.h"
#include "cpu_disasm.h"
#include "gpu_types.h"
#include "settings.h"
#include "util/gpu_device.h"
#include "common/assert.h"
#include "common/log.h"
#include <climits>
#include <cmath>
LOG_CHANNEL(CPU);
// #define LOG_VALUES 1
// #define LOG_LOOKUPS 1
// TODO: Don't update flags on Validate(), instead return it.
namespace CPU::PGXP {
enum : u32
{
VERTEX_CACHE_WIDTH = 2048,
VERTEX_CACHE_HEIGHT = 2048,
VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT,
PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4,
PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4,
};
enum : u32
{
VALID_X = (1u << 0),
VALID_Y = (1u << 1),
VALID_Z = (1u << 2),
VALID_LOWZ = (1u << 16), // Valid Z from the low part of a 32-bit value.
VALID_HIGHZ = (1u << 17), // Valid Z from the high part of a 32-bit value.
VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate.
VALID_XY = (VALID_X | VALID_Y),
VALID_XYZ = (VALID_X | VALID_Y | VALID_Z),
VALID_ALL = (VALID_X | VALID_Y | VALID_Z),
};
#define LOWORD_U16(val) (static_cast<u16>(val))
#define HIWORD_U16(val) (static_cast<u16>(static_cast<u32>(val) >> 16))
#define LOWORD_S16(val) (static_cast<s16>(static_cast<u16>(val)))
#define HIWORD_S16(val) (static_cast<s16>(static_cast<u16>(static_cast<u32>(val) >> 16)))
#define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword)))
#define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16))
static double f16Sign(double val);
static double f16Unsign(double val);
static double f16Overflow(double val);
static void CacheVertex(u32 value, const PGXPValue& vertex);
static PGXPValue* GetCachedVertex(u32 value);
static float TruncateVertexPosition(float p);
static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y);
static PGXPValue& GetRdValue(Instruction instr);
static PGXPValue& GetRtValue(Instruction instr);
static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal);
static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal);
static void SetRtValue(Instruction instr, const PGXPValue& val);
static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal);
static PGXPValue& GetSXY0();
static PGXPValue& GetSXY1();
static PGXPValue& GetSXY2();
static PGXPValue& PushSXY();
static PGXPValue* GetPtr(u32 addr);
static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value);
static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign);
static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val);
static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal);
static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh);
static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable);
static void WriteMem(u32 addr, const PGXPValue& value);
static void WriteMem16(u32 addr, const PGXPValue& value);
static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src);
static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2);
#ifdef LOG_VALUES
static void LogInstruction(u32 pc, Instruction instr);
static void LogValue(const char* name, u32 rval, const PGXPValue* val);
static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val);
// clang-format off
#define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0)
#define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0)
#define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(rnum)), rval, &g_state.pgxp_gpr[static_cast<u32>(rnum)]); } while(0)
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r1num)), r1val, &g_state.pgxp_gpr[static_cast<u32>(r1num)]); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r2num)), r2val, &g_state.pgxp_gpr[static_cast<u32>(r2num)]); } while(0)
#define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0)
#define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0)
#else
#define LOG_VALUES_NV() (void)0
#define LOG_VALUES_1(name, rval, val) (void)0
#define LOG_VALUES_C1(rnum, rval) (void)0
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0
#define LOG_VALUES_LOAD(addr, val) (void)0
#define LOG_VALUES_STORE(rnum, rval, addr) (void)0
#endif
// clang-format on
static constexpr const PGXPValue INVALID_VALUE = {};
static PGXPValue* s_mem = nullptr;
static PGXPValue* s_vertex_cache = nullptr;
#ifdef LOG_VALUES
static std::FILE* s_log;
#endif
} // namespace CPU::PGXP
void CPU::PGXP::Initialize()
{
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
if (!s_mem)
{
s_mem = static_cast<PGXPValue*>(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue)));
if (!s_mem)
Panic("Failed to allocate PGXP memory");
}
if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache)
{
s_vertex_cache = static_cast<PGXPValue*>(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue)));
if (!s_vertex_cache)
{
ERROR_LOG("Failed to allocate memory for vertex cache, disabling.");
g_settings.gpu_pgxp_vertex_cache = false;
}
}
if (s_vertex_cache)
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
}
void CPU::PGXP::Reset()
{
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
if (s_mem)
std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE);
if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache)
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
}
void CPU::PGXP::Shutdown()
{
if (s_vertex_cache)
{
std::free(s_vertex_cache);
s_vertex_cache = nullptr;
}
if (s_mem)
{
std::free(s_mem);
s_mem = nullptr;
}
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
}
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val)
{
const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1)));
return static_cast<double>(s) / static_cast<double>(USHRT_MAX + 1);
}
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val)
{
return (val >= 0) ? val : (val + (USHRT_MAX + 1));
}
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val)
{
return static_cast<double>(static_cast<s64>(val) >> 16);
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr)
{
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rd.GetValue())];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr)
{
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal)
{
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
ret.Validate(rtVal);
return ret;
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal)
{
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rs.GetValue())];
ret.Validate(rsVal);
return ret;
}
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val)
{
g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())] = val;
}
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal)
{
PGXPValue& prtVal = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
prtVal = val;
prtVal.value = rtVal;
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY0()
{
return g_state.pgxp_gte[12];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY1()
{
return g_state.pgxp_gte[13];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY2()
{
return g_state.pgxp_gte[14];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::PushSXY()
{
g_state.pgxp_gte[12] = g_state.pgxp_gte[13];
g_state.pgxp_gte[13] = g_state.pgxp_gte[14];
return g_state.pgxp_gte[14];
}
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr)
{
#if 0
if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 &&
(addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4)
__debugbreak();
#endif
if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)];
const u32 paddr = (addr & PHYSICAL_MEMORY_ADDRESS_MASK);
if (paddr < Bus::RAM_MIRROR_END)
return &s_mem[(paddr & Bus::g_ram_mask) >> 2];
else
return nullptr;
}
ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value)
{
PGXPValue* pMem = GetPtr(addr);
if (!pMem) [[unlikely]]
return INVALID_VALUE;
pMem->Validate(value);
return *pMem;
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign)
{
PGXPValue* pMem = GetPtr(addr);
if (!pMem) [[unlikely]]
{
dest = INVALID_VALUE;
return;
}
// determine if high or low word
const bool hiword = ((addr & 2) != 0);
// only validate the component we're interested in
pMem->flags = hiword ?
((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) :
((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X));
// copy whole value
dest = *pMem;
// if high word then shift
if (hiword)
{
dest.x = dest.y;
dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1);
}
// only set y as valid if x is also valid.. don't want to make fake values
if (dest.flags & VALID_X)
{
dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f;
dest.flags |= VALID_Y;
}
else
{
dest.y = 0.0f;
dest.flags &= ~VALID_Y;
}
dest.value = value;
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value)
{
PGXPValue* pMem = GetPtr(addr);
if (!pMem) [[unlikely]]
return;
*pMem = value;
pMem->flags |= VALID_LOWZ | VALID_HIGHZ;
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value)
{
PGXPValue* dest = GetPtr(addr);
if (!dest) [[unlikely]]
return;
// determine if high or low word
const bool hiword = ((addr & 2) != 0);
if (hiword)
{
dest->y = value.x;
dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1);
dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16);
}
else
{
dest->x = value.x;
dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X);
dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF));
}
// overwrite z/w if valid
// TODO: Check modified
if (value.flags & VALID_Z)
{
dest->z = value.z;
dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ);
}
else
{
dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ;
if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ)))
dest->flags &= ~VALID_Z;
}
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src)
{
dst.z = (dst.flags & VALID_Z) ? dst.z : src.z;
dst.flags |= (src.flags & VALID_Z);
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1,
const PGXPValue& src2)
{
// Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise.
dst_z = (!(src1.flags & VALID_Z) ||
(src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ?
src2.z :
src1.z;
dst_flags |= ((src1.flags | src2.flags) & VALID_Z);
}
#ifdef LOG_VALUES
void CPU::PGXP::LogInstruction(u32 pc, Instruction instr)
{
if (!s_log) [[unlikely]]
{
s_log = std::fopen("pgxp.log", "wb");
}
else
{
std::fflush(s_log);
std::fputc('\n', s_log);
}
SmallString str;
DisassembleInstruction(&str, pc, instr.bits);
std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str());
}
void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val)
{
if (!s_log) [[unlikely]]
return;
SmallString str;
LogValueStr(str, name, rval, val);
std::fprintf(s_log, " %s", str.c_str());
}
void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val)
{
str.append_format("{}=[{:08X}", name, rval);
if (!val)
{
str.append(", NULL]");
}
else
{
if (val->value != rval)
str.append_format(", PGXP{:08X}", val->value);
str.append_format(", {{{},{},{}}}", val->x, val->y, val->z);
if (val->flags & VALID_ALL)
{
str.append(", valid=");
if (val->flags & VALID_X)
str.append('X');
if (val->flags & VALID_Y)
str.append('Y');
if (val->flags & VALID_Z)
str.append('Z');
}
// if (val->flags & VALID_TAINTED_Z)
// str.append(", tainted");
str.append(']');
}
}
#endif
void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value)
{
PGXPValue& pvalue = PushSXY();
pvalue.x = x;
pvalue.y = y;
pvalue.z = z;
pvalue.value = value;
pvalue.flags = VALID_ALL;
if (g_settings.gpu_pgxp_vertex_cache)
CacheVertex(value, pvalue);
}
bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2)
{
PGXPValue& SXY0 = GetSXY0();
SXY0.Validate(sxy0);
PGXPValue& SXY1 = GetSXY1();
SXY1.Validate(sxy1);
PGXPValue& SXY2 = GetSXY2();
SXY2.Validate(sxy2);
// Don't use accurate clipping for game-constructed values, which don't have a valid Z.
return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ));
}
float CPU::PGXP::GTE_NCLIP()
{
const PGXPValue& SXY0 = GetSXY0();
const PGXPValue& SXY1 = GetSXY1();
const PGXPValue& SXY2 = GetSXY2();
float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) -
(SXY2.x * SXY1.y));
// ensure fractional values are not incorrectly rounded to 0
const float nclip_abs = std::abs(nclip);
if (0.1f < nclip_abs && nclip_abs < 1.0f)
nclip += (nclip < 0.0f ? -1.0f : 1.0f);
return nclip;
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val)
{
switch (reg)
{
case 15:
{
// push FIFO
PGXPValue& SXY2 = PushSXY();
SXY2 = value;
return;
}
// read-only registers
case 29:
case 31:
{
return;
}
default:
{
PGXPValue& gteVal = g_state.pgxp_gte[reg];
gteVal = value;
gteVal.value = val;
return;
}
}
}
void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal)
{
// CPU[Rt] = GTE_D[Rd]
const u32 idx = instr.cop.Cop2Index();
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]);
PGXPValue& prdVal = g_state.pgxp_gte[idx];
prdVal.Validate(rdVal);
SetRtValue(instr, prdVal, rdVal);
}
void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal)
{
// GTE_D[Rd] = CPU[Rt]
const u32 idx = instr.cop.Cop2Index();
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
CPU_MTC2(idx, prtVal, rtVal);
}
void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal)
{
// GTE_D[Rt] = Mem[addr]
LOG_VALUES_LOAD(addr, rtVal);
const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal);
CPU_MTC2(static_cast<u32>(instr.r.rt.GetValue()), pMem, rtVal);
}
void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal)
{
// Mem[addr] = GTE_D[Rt]
const u32 idx = static_cast<u32>(instr.r.rt.GetValue());
PGXPValue& prtVal = g_state.pgxp_gte[idx];
#ifdef LOG_VALUES
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal);
std::fprintf(s_log, " addr=%08X", addr);
#endif
prtVal.Validate(rtVal);
WriteMem(addr, prtVal);
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex)
{
const s16 sx = static_cast<s16>(value & 0xFFFFu);
const s16 sy = static_cast<s16>(value >> 16);
DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023);
s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex;
}
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value)
{
const s16 sx = static_cast<s16>(value & 0xFFFFu);
const s16 sy = static_cast<s16>(value >> 16);
return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ?
&s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] :
nullptr;
}
ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p)
{
// Truncates positions to 11 bits before drawing.
// Matches GPU command parsing, where the upper 5 bits are dropped.
// Necessary for Jet Moto and Racingroovy VS.
const s32 int_part = static_cast<s32>(p);
const float int_part_f = static_cast<float>(int_part);
return static_cast<float>(TruncateGPUVertexPosition(int_part)) + (p - int_part_f);
}
ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y)
{
const float tolerance = g_settings.gpu_pgxp_tolerance;
if (tolerance < 0.0f)
return true;
return (std::abs(precise_x - static_cast<float>(int_x)) <= tolerance &&
std::abs(precise_y - static_cast<float>(int_y)) <= tolerance);
}
bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
float* out_w)
{
const PGXPValue* vert = GetPtr(addr);
if (vert && (vert->flags & VALID_XY) == VALID_XY && vert->value == value)
{
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
*out_w = vert->z / 32768.0f;
#ifdef LOG_LOOKUPS
GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y,
TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
std::abs(*out_y - y));
#endif
if (IsWithinTolerance(*out_x, *out_y, x, y))
{
// check validity of z component
return ((vert->flags & VALID_Z) == VALID_Z);
}
}
if (g_settings.gpu_pgxp_vertex_cache)
{
vert = GetCachedVertex(value);
if (vert && (vert->flags & VALID_XY) == VALID_XY)
{
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
*out_w = vert->z / 32768.0f;
if (IsWithinTolerance(*out_x, *out_y, x, y))
return false;
}
}
// no valid value can be found anywhere, use the native PSX data
*out_x = static_cast<float>(x);
*out_y = static_cast<float>(y);
*out_w = 1.0f;
return false;
}
void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im]
LOG_VALUES_LOAD(addr, rtVal);
SetRtValue(instr, ValidateAndLoadMem(addr, rtVal));
}
void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal)
{
LOG_VALUES_LOAD(addr, rtVal);
SetRtValue(instr, INVALID_VALUE);
}
void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im] (sign extended)
LOG_VALUES_LOAD(addr, rtVal);
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true);
}
void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im] (zero extended)
LOG_VALUES_LOAD(addr, rtVal);
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false);
}
void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal)
{
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
WriteMem(addr, INVALID_VALUE);
}
void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal)
{
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
WriteMem16(addr, prtVal);
}
void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal)
{
// Mem[Rs + Im] = Rt
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
WriteMem(addr, prtVal);
}
void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal)
{
const u32 Rs = (rd_and_rs & 0xFFu);
const u32 Rd = (rd_and_rs >> 8);
CPU_MOVE(Rd, Rs, rsVal);
}
void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal)
{
#ifdef LOG_VALUES
const Instruction instr = {0};
LOG_VALUES_C1(Rs, rsVal);
#endif
PGXPValue& prsVal = g_state.pgxp_gpr[Rs];
prsVal.Validate(rsVal);
g_state.pgxp_gpr[Rd] = prsVal;
}
void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs + Imm (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
const u32 immVal = instr.i.imm_sext32();
PGXPValue& prtVal = GetRtValue(instr);
prtVal = prsVal;
if (immVal == 0)
return;
if (rsVal == 0)
{
// x is low precision value
prtVal.x = static_cast<float>(LOWORD_S16(immVal));
prtVal.y = static_cast<float>(HIWORD_S16(immVal));
prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z;
prtVal.value = immVal;
return;
}
prtVal.x = static_cast<float>(f16Unsign(prtVal.x));
prtVal.x += static_cast<float>(LOWORD_U16(immVal));
// carry on over/underflow
const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f;
prtVal.x = static_cast<float>(f16Sign(prtVal.x));
prtVal.y += HIWORD_S16(immVal) + of;
// truncate on overflow/underflow
prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
prtVal.value = rsVal + immVal;
prtVal.flags |= VALID_TAINTED_Z;
}
void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs & Imm
const u32 imm = instr.i.imm_zext32();
const u32 rtVal = rsVal & imm;
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = GetRtValue(instr);
// remove upper 16-bits
prtVal.y = 0.0f;
prtVal.z = prsVal.z;
prtVal.value = rtVal;
prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z;
switch (imm)
{
case 0:
{
// if 0 then x == 0
prtVal.x = 0.0f;
prtVal.flags |= VALID_X;
}
break;
case 0xFFFFu:
{
// if saturated then x == x
prtVal.x = prsVal.x;
}
break;
default:
{
// otherwise x is low precision value
prtVal.x = static_cast<float>(LOWORD_S16(rtVal));
prtVal.flags |= VALID_X;
}
break;
}
}
void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs | Imm
const u32 imm = instr.i.imm_zext32();
const u32 rtVal = rsVal | imm;
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& pRtVal = GetRtValue(instr);
pRtVal = pRsVal;
pRtVal.value = rtVal;
if (imm == 0) [[unlikely]]
{
// if 0 then x == x
}
else
{
// otherwise x is low precision value
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
}
}
void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs ^ Imm
const u32 imm = instr.i.imm_zext32();
const u32 rtVal = rsVal ^ imm;
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& pRtVal = GetRtValue(instr);
pRtVal = pRsVal;
pRtVal.value = rtVal;
if (imm == 0) [[unlikely]]
{
// if 0 then x == x
}
else
{
// otherwise x is low precision value
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
}
}
void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs < Imm (signed)
const s32 imm = instr.i.imm_s16();
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
const float fimmx = static_cast<float>(imm);
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
PGXPValue& prtVal = GetRtValue(instr);
prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f;
prtVal.y = 0.0f;
prtVal.z = prsVal.z;
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
prtVal.value = BoolToUInt32(static_cast<s32>(rsVal) < imm);
}
void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs < Imm (Unsigned)
const u32 imm = instr.i.imm_u16();
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
const float fimmx = static_cast<float>(static_cast<s16>(imm)); // deliberately signed
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
PGXPValue& prtVal = GetRtValue(instr);
prtVal.x =
(f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f;
prtVal.y = 0.0f;
prtVal.z = prsVal.z;
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
prtVal.value = BoolToUInt32(rsVal < imm);
}
void CPU::PGXP::CPU_LUI(Instruction instr)
{
LOG_VALUES_NV();
// Rt = Imm << 16
PGXPValue& pRtVal = GetRtValue(instr);
pRtVal.x = 0.0f;
pRtVal.y = static_cast<float>(instr.i.imm_s16());
pRtVal.z = 0.0f;
pRtVal.value = instr.i.imm_zext32() << 16;
pRtVal.flags = VALID_XY;
}
void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs + Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
if (rtVal == 0)
{
prdVal = prsVal;
CopyZIfMissing(prdVal, prtVal);
}
else if (rsVal == 0)
{
prdVal = prtVal;
CopyZIfMissing(prdVal, prsVal);
}
else
{
const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal));
// carry on over/underflow
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
prdVal.x = static_cast<float>(f16Sign(x));
prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of;
// truncate on overflow/underflow
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
prdVal.value = rsVal + rtVal;
// valid x/y only if one side had a valid x/y
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
}
}
void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs - Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
if (rtVal == 0)
{
prdVal = prsVal;
CopyZIfMissing(prdVal, prtVal);
}
else
{
const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal));
// carry on over/underflow
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
prdVal.x = static_cast<float>(f16Sign(x));
prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of);
// truncate on overflow/underflow
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
prdVal.value = rsVal - rtVal;
// valid x/y only if one side had a valid x/y
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
}
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal)
{
// Rd = Rs & Rt
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
float x, y;
if (LOWORD_U16(rdVal) == 0)
x = 0.0f;
else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal))
x = prsVal.GetValidX(rsVal);
else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal))
x = prtVal.GetValidX(rtVal);
else
x = static_cast<float>(LOWORD_S16(rdVal));
if (HIWORD_U16(rdVal) == 0)
y = 0.0f;
else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal))
y = prsVal.GetValidY(rsVal);
else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal))
y = prtVal.GetValidY(rtVal);
else
y = static_cast<float>(HIWORD_S16(rdVal));
// Why not write directly to prdVal? Because it might be the same as the source.
u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0;
PGXPValue& prdVal = GetRdValue(instr);
SelectZ(prdVal.z, flags, prsVal, prtVal);
prdVal.x = x;
prdVal.y = y;
prdVal.flags = flags;
prdVal.value = rdVal;
}
void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs & Rt
const u32 rdVal = rsVal & rtVal;
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
}
void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs | Rt
const u32 rdVal = rsVal | rtVal;
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
}
void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs ^ Rt
const u32 rdVal = rsVal ^ rtVal;
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
}
void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs NOR Rt
const u32 rdVal = ~(rsVal | rtVal);
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
}
void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs < Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) ||
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1.0f :
0.0f;
prdVal.y = 0.0f;
prdVal.z = prsVal.z;
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
prdVal.value = BoolToUInt32(static_cast<s32>(rsVal) < static_cast<s32>(rtVal));
}
void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs < Rt (unsigned)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) ||
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1.0f :
0.0f;
prdVal.y = 0.0f;
prdVal.z = prsVal.z;
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
prdVal.value = BoolToUInt32(rsVal < rtVal);
}
void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Hi/Lo = Rs * Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
ploVal = prsVal;
CopyZIfMissing(ploVal, prsVal);
// Z/valid is the same
phiVal = ploVal;
const float rsx = prsVal.GetValidX(rsVal);
const float rsy = prsVal.GetValidY(rsVal);
const float rtx = prtVal.GetValidX(rtVal);
const float rty = prtVal.GetValidY(rtVal);
// Multiply out components
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
const double xy = f16Unsign(rsx) * (rty);
const double yx = rsy * f16Unsign(rtx);
const double yy = rsy * rty;
// Split values into outputs
const double lx = xx;
const double ly = f16Overflow(xx) + (xy + yx);
const double hx = f16Overflow(ly) + yy;
const double hy = f16Overflow(hx);
ploVal.x = static_cast<float>(f16Sign(lx));
ploVal.y = static_cast<float>(f16Sign(ly));
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
phiVal.x = static_cast<float>(f16Sign(hx));
phiVal.y = static_cast<float>(f16Sign(hy));
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
// compute PSX value
const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal)));
phiVal.value = Truncate32(result >> 32);
ploVal.value = Truncate32(result);
}
void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Hi/Lo = Rs * Rt (unsigned)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
ploVal = prsVal;
CopyZIfMissing(ploVal, prsVal);
// Z/valid is the same
phiVal = ploVal;
const float rsx = prsVal.GetValidX(rsVal);
const float rsy = prsVal.GetValidY(rsVal);
const float rtx = prtVal.GetValidX(rtVal);
const float rty = prtVal.GetValidY(rtVal);
// Multiply out components
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
const double xy = f16Unsign(rsx) * f16Unsign(rty);
const double yx = f16Unsign(rsy) * f16Unsign(rtx);
const double yy = f16Unsign(rsy) * f16Unsign(rty);
// Split values into outputs
const double lx = xx;
const double ly = f16Overflow(xx) + (xy + yx);
const double hx = f16Overflow(ly) + yy;
const double hy = f16Overflow(hx);
ploVal.x = static_cast<float>(f16Sign(lx));
ploVal.y = static_cast<float>(f16Sign(ly));
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
phiVal.x = static_cast<float>(f16Sign(hx));
phiVal.y = static_cast<float>(f16Sign(hy));
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
// compute PSX value
const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal);
phiVal.value = Truncate32(result >> 32);
ploVal.value = Truncate32(result);
}
void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Lo = Rs / Rt (signed)
// Hi = Rs % Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
ploVal = prsVal;
CopyZIfMissing(ploVal, prsVal);
// Z/valid is the same
phiVal = ploVal;
const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast<double>(1 << 16);
const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast<double>(1 << 16);
const double lo = vs / vt;
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
ploVal.x = static_cast<float>(f16Sign(lo));
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
const double hi = std::fmod(vs, vt);
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
phiVal.x = static_cast<float>(f16Sign(hi));
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
// compute PSX value
if (static_cast<s32>(rtVal) == 0)
{
// divide by zero
ploVal.value = (static_cast<s32>(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal));
}
else if (rsVal == UINT32_C(0x80000000) && static_cast<s32>(rtVal) == -1)
{
// unrepresentable
ploVal.value = UINT32_C(0x80000000);
phiVal.value = 0;
}
else
{
ploVal.value = static_cast<u32>(static_cast<s32>(rsVal) / static_cast<s32>(rtVal));
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal) % static_cast<s32>(rtVal));
}
}
void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Lo = Rs / Rt (unsigned)
// Hi = Rs % Rt (unsigned)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
ploVal = prsVal;
CopyZIfMissing(ploVal, prsVal);
// Z/valid is the same
phiVal = ploVal;
const double vs =
f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast<double>(1 << 16);
const double vt =
f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast<double>(1 << 16);
const double lo = vs / vt;
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
ploVal.x = static_cast<float>(f16Sign(lo));
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
const double hi = std::fmod(vs, vt);
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
phiVal.x = static_cast<float>(f16Sign(hi));
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
if (rtVal == 0)
{
// divide by zero
ploVal.value = UINT32_C(0xFFFFFFFF);
phiVal.value = rsVal;
}
else
{
ploVal.value = rsVal / rtVal;
phiVal.value = rsVal % rtVal;
}
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh)
{
const u32 rdVal = rtVal << sh;
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
prdVal.z = prtVal.z;
prdVal.value = rdVal;
if (sh >= 32) [[unlikely]]
{
prdVal.x = 0.0f;
prdVal.y = 0.0f;
prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z;
}
else if (sh == 16)
{
prdVal.y = prtVal.x;
prdVal.x = 0.0f;
// Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value
// instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid.
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
}
else if (sh >= 16)
{
prdVal.y = static_cast<float>(f16Sign(f16Unsign(prtVal.x * static_cast<double>(1 << (sh - 16)))));
prdVal.x = 0.0f;
// See above.
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
}
else
{
const double x = f16Unsign(prtVal.x) * static_cast<double>(1 << sh);
const double y = (f16Unsign(prtVal.y) * static_cast<double>(1 << sh)) + f16Overflow(x);
prdVal.x = static_cast<float>(f16Sign(x));
prdVal.y = static_cast<float>(f16Sign(y));
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z);
}
}
void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal)
{
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
// Rd = Rt << Sa
const u32 sh = instr.r.shamt;
CPU_SLL(instr, rtVal, sh);
}
void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal)
{
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
// Rd = Rt << Rs
const u32 sh = rsVal & 0x1F;
CPU_SLL(instr, rtVal, sh);
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable)
{
const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
double x = prtVal.x;
double y = sign ? prtVal.y : f16Unsign(prtVal.y);
const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y
const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
// Shift test values
const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh);
if (LOWORD_S16(dX) != HIWORD_S16(iX))
x = x / static_cast<double>(1 << sh);
else
x = LOWORD_S16(dX); // only sign bits left
if (LOWORD_S16(dY) != HIWORD_S16(iX))
{
if (sh == 16)
{
x = y;
}
else if (sh < 16)
{
x += y * static_cast<double>(1 << (16 - sh));
if (prtVal.x < 0)
x += static_cast<double>(1 << (16 - sh));
}
else
{
x += y / static_cast<double>(1 << (sh - 16));
}
}
if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
y = HIWORD_S16(dY);
else
y = y / static_cast<double>(1 << sh);
PGXPValue& prdVal = GetRdValue(instr);
// Use low precision/rounded values when we're not shifting an entire component,
// and it's not originally from a 3D value. Too many false positives in P2/etc.
// What we probably should do is not set the valid flag on non-3D values to begin
// with, only letting them become valid when used in another expression.
if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16)
{
prdVal.x = static_cast<float>(LOWORD_S16(rdVal));
prdVal.y = static_cast<float>(HIWORD_S16(rdVal));
prdVal.z = 0.0f;
prdVal.value = rdVal;
prdVal.flags = VALID_XY | VALID_TAINTED_Z;
}
else
{
prdVal.x = static_cast<float>(f16Sign(x));
prdVal.y = static_cast<float>(f16Sign(y));
prdVal.z = prtVal.z;
prdVal.value = rdVal;
prdVal.flags = prtVal.flags | VALID_TAINTED_Z;
}
}
void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
{
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
// Rd = Rt >> Sa
const u32 sh = instr.r.shamt;
CPU_SRx(instr, rtVal, sh, false, false);
}
void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
{
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
// Rd = Rt >> Sa
const u32 sh = rsVal & 0x1F;
CPU_SRx(instr, rtVal, sh, false, true);
}
void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
{
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
// Rd = Rt >> Sa
const u32 sh = instr.r.shamt;
CPU_SRx(instr, rtVal, sh, true, false);
}
void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
{
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
// Rd = Rt >> Sa
const u32 sh = rsVal & 0x1F;
CPU_SRx(instr, rtVal, sh, true, true);
}
void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)
{
const u32 idx = static_cast<u8>(instr.r.rd.GetValue());
LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]);
// CPU[Rt] = CP0[Rd]
PGXPValue& prdVal = g_state.pgxp_cop0[idx];
prdVal.Validate(rdVal);
PGXPValue& prtVal = GetRtValue(instr);
prtVal = prdVal;
prtVal.value = rdVal;
}
void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal)
{
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
// CP0[Rd] = CPU[Rt]
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = g_state.pgxp_cop0[static_cast<u8>(instr.r.rd.GetValue())];
prdVal = prtVal;
prtVal.value = rdVal;
}