duckstation/src/core/cpu_pgxp.cpp
Stenzek 6551358212
Log: Replace channel string search with bitset
Knocks off around ~20KB of code.
2024-10-31 14:41:33 +10:00

1458 lines
43 KiB
C++

// SPDX-FileCopyrightText: 2016 iCatButler, 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
//
// This file has been completely rewritten over the years compared to the original PCSXR-PGXP release.
// No original code remains. The original copyright notice is included above for historical purposes.
//
#include "cpu_pgxp.h"
#include "bus.h"
#include "cpu_core.h"
#include "cpu_disasm.h"
#include "settings.h"
#include "util/gpu_device.h"
#include "common/assert.h"
#include "common/log.h"
#include <climits>
#include <cmath>
LOG_CHANNEL(CPU);
// #define LOG_VALUES 1
// #define LOG_LOOKUPS 1
// TODO: Don't update flags on Validate(), instead return it.
namespace CPU::PGXP {
enum : u32
{
VERTEX_CACHE_WIDTH = 2048,
VERTEX_CACHE_HEIGHT = 2048,
VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT,
PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4,
PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4,
};
enum : u32
{
VALID_X = (1u << 0),
VALID_Y = (1u << 1),
VALID_Z = (1u << 2),
VALID_LOWZ = (1u << 16), // Valid Z from the low part of a 32-bit value.
VALID_HIGHZ = (1u << 17), // Valid Z from the high part of a 32-bit value.
VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate.
VALID_XY = (VALID_X | VALID_Y),
VALID_XYZ = (VALID_X | VALID_Y | VALID_Z),
VALID_ALL = (VALID_X | VALID_Y | VALID_Z),
};
#define LOWORD_U16(val) (static_cast<u16>(val))
#define HIWORD_U16(val) (static_cast<u16>(static_cast<u32>(val) >> 16))
#define LOWORD_S16(val) (static_cast<s16>(static_cast<u16>(val)))
#define HIWORD_S16(val) (static_cast<s16>(static_cast<u16>(static_cast<u32>(val) >> 16)))
#define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword)))
#define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16))
static double f16Sign(double val);
static double f16Unsign(double val);
static double f16Overflow(double val);
static void CacheVertex(u32 value, const PGXPValue& vertex);
static PGXPValue* GetCachedVertex(u32 value);
static float TruncateVertexPosition(float p);
static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y);
static PGXPValue& GetRdValue(Instruction instr);
static PGXPValue& GetRtValue(Instruction instr);
static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal);
static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal);
static void SetRtValue(Instruction instr, const PGXPValue& val);
static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal);
static PGXPValue& GetSXY0();
static PGXPValue& GetSXY1();
static PGXPValue& GetSXY2();
static PGXPValue& PushSXY();
static PGXPValue* GetPtr(u32 addr);
static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value);
static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign);
static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val);
static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal);
static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh);
static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable);
static void WriteMem(u32 addr, const PGXPValue& value);
static void WriteMem16(u32 addr, const PGXPValue& value);
static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src);
static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2);
#ifdef LOG_VALUES
static void LogInstruction(u32 pc, Instruction instr);
static void LogValue(const char* name, u32 rval, const PGXPValue* val);
static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val);
// clang-format off
#define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0)
#define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0)
#define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(rnum)), rval, &g_state.pgxp_gpr[static_cast<u32>(rnum)]); } while(0)
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r1num)), r1val, &g_state.pgxp_gpr[static_cast<u32>(r1num)]); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r2num)), r2val, &g_state.pgxp_gpr[static_cast<u32>(r2num)]); } while(0)
#define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0)
#define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0)
#else
#define LOG_VALUES_NV() (void)0
#define LOG_VALUES_1(name, rval, val) (void)0
#define LOG_VALUES_C1(rnum, rval) (void)0
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0
#define LOG_VALUES_LOAD(addr, val) (void)0
#define LOG_VALUES_STORE(rnum, rval, addr) (void)0
#endif
// clang-format on
static constexpr const PGXPValue INVALID_VALUE = {};
static PGXPValue* s_mem = nullptr;
static PGXPValue* s_vertex_cache = nullptr;
#ifdef LOG_VALUES
static std::FILE* s_log;
#endif
} // namespace CPU::PGXP
void CPU::PGXP::Initialize()
{
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
if (!s_mem)
{
s_mem = static_cast<PGXPValue*>(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue)));
if (!s_mem)
Panic("Failed to allocate PGXP memory");
}
if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache)
{
s_vertex_cache = static_cast<PGXPValue*>(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue)));
if (!s_vertex_cache)
{
ERROR_LOG("Failed to allocate memory for vertex cache, disabling.");
g_settings.gpu_pgxp_vertex_cache = false;
}
}
if (s_vertex_cache)
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
}
void CPU::PGXP::Reset()
{
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
if (s_mem)
std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE);
if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache)
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
}
void CPU::PGXP::Shutdown()
{
if (s_vertex_cache)
{
std::free(s_vertex_cache);
s_vertex_cache = nullptr;
}
if (s_mem)
{
std::free(s_mem);
s_mem = nullptr;
}
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
}
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val)
{
const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1)));
return static_cast<double>(s) / static_cast<double>(USHRT_MAX + 1);
}
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val)
{
return (val >= 0) ? val : (val + (USHRT_MAX + 1));
}
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val)
{
return static_cast<double>(static_cast<s64>(val) >> 16);
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr)
{
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rd.GetValue())];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr)
{
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal)
{
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
ret.Validate(rtVal);
return ret;
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal)
{
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rs.GetValue())];
ret.Validate(rsVal);
return ret;
}
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val)
{
g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())] = val;
}
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal)
{
PGXPValue& prtVal = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
prtVal = val;
prtVal.value = rtVal;
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY0()
{
return g_state.pgxp_gte[12];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY1()
{
return g_state.pgxp_gte[13];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY2()
{
return g_state.pgxp_gte[14];
}
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::PushSXY()
{
g_state.pgxp_gte[12] = g_state.pgxp_gte[13];
g_state.pgxp_gte[13] = g_state.pgxp_gte[14];
return g_state.pgxp_gte[14];
}
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr)
{
#if 0
if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 &&
(addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4)
__debugbreak();
#endif
if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)];
const u32 paddr = (addr & PHYSICAL_MEMORY_ADDRESS_MASK);
if (paddr < Bus::RAM_MIRROR_END)
return &s_mem[(paddr & Bus::g_ram_mask) >> 2];
else
return nullptr;
}
ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value)
{
PGXPValue* pMem = GetPtr(addr);
if (!pMem) [[unlikely]]
return INVALID_VALUE;
pMem->Validate(value);
return *pMem;
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign)
{
PGXPValue* pMem = GetPtr(addr);
if (!pMem) [[unlikely]]
{
dest = INVALID_VALUE;
return;
}
// determine if high or low word
const bool hiword = ((addr & 2) != 0);
// only validate the component we're interested in
pMem->flags = hiword ?
((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) :
((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X));
// copy whole value
dest = *pMem;
// if high word then shift
if (hiword)
{
dest.x = dest.y;
dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1);
}
// only set y as valid if x is also valid.. don't want to make fake values
if (dest.flags & VALID_X)
{
dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f;
dest.flags |= VALID_Y;
}
else
{
dest.y = 0.0f;
dest.flags &= ~VALID_Y;
}
dest.value = value;
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value)
{
PGXPValue* pMem = GetPtr(addr);
if (!pMem) [[unlikely]]
return;
*pMem = value;
pMem->flags |= VALID_LOWZ | VALID_HIGHZ;
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value)
{
PGXPValue* dest = GetPtr(addr);
if (!dest) [[unlikely]]
return;
// determine if high or low word
const bool hiword = ((addr & 2) != 0);
if (hiword)
{
dest->y = value.x;
dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1);
dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16);
}
else
{
dest->x = value.x;
dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X);
dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF));
}
// overwrite z/w if valid
// TODO: Check modified
if (value.flags & VALID_Z)
{
dest->z = value.z;
dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ);
}
else
{
dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ;
if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ)))
dest->flags &= ~VALID_Z;
}
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src)
{
dst.z = (dst.flags & VALID_Z) ? dst.z : src.z;
dst.flags |= (src.flags & VALID_Z);
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1,
const PGXPValue& src2)
{
// Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise.
dst_z = (!(src1.flags & VALID_Z) ||
(src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ?
src2.z :
src1.z;
dst_flags |= ((src1.flags | src2.flags) & VALID_Z);
}
#ifdef LOG_VALUES
void CPU::PGXP::LogInstruction(u32 pc, Instruction instr)
{
if (!s_log) [[unlikely]]
{
s_log = std::fopen("pgxp.log", "wb");
}
else
{
std::fflush(s_log);
std::fputc('\n', s_log);
}
SmallString str;
DisassembleInstruction(&str, pc, instr.bits);
std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str());
}
void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val)
{
if (!s_log) [[unlikely]]
return;
SmallString str;
LogValueStr(str, name, rval, val);
std::fprintf(s_log, " %s", str.c_str());
}
void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val)
{
str.append_format("{}=[{:08X}", name, rval);
if (!val)
{
str.append(", NULL]");
}
else
{
if (val->value != rval)
str.append_format(", PGXP{:08X}", val->value);
str.append_format(", {{{},{},{}}}", val->x, val->y, val->z);
if (val->flags & VALID_ALL)
{
str.append(", valid=");
if (val->flags & VALID_X)
str.append('X');
if (val->flags & VALID_Y)
str.append('Y');
if (val->flags & VALID_Z)
str.append('Z');
}
// if (val->flags & VALID_TAINTED_Z)
// str.append(", tainted");
str.append(']');
}
}
#endif
void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value)
{
PGXPValue& pvalue = PushSXY();
pvalue.x = x;
pvalue.y = y;
pvalue.z = z;
pvalue.value = value;
pvalue.flags = VALID_ALL;
if (g_settings.gpu_pgxp_vertex_cache)
CacheVertex(value, pvalue);
}
bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2)
{
PGXPValue& SXY0 = GetSXY0();
SXY0.Validate(sxy0);
PGXPValue& SXY1 = GetSXY1();
SXY1.Validate(sxy1);
PGXPValue& SXY2 = GetSXY2();
SXY2.Validate(sxy2);
// Don't use accurate clipping for game-constructed values, which don't have a valid Z.
return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ));
}
float CPU::PGXP::GTE_NCLIP()
{
const PGXPValue& SXY0 = GetSXY0();
const PGXPValue& SXY1 = GetSXY1();
const PGXPValue& SXY2 = GetSXY2();
float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) -
(SXY2.x * SXY1.y));
// ensure fractional values are not incorrectly rounded to 0
const float nclip_abs = std::abs(nclip);
if (0.1f < nclip_abs && nclip_abs < 1.0f)
nclip += (nclip < 0.0f ? -1.0f : 1.0f);
return nclip;
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val)
{
switch (reg)
{
case 15:
{
// push FIFO
PGXPValue& SXY2 = PushSXY();
SXY2 = value;
return;
}
// read-only registers
case 29:
case 31:
{
return;
}
default:
{
PGXPValue& gteVal = g_state.pgxp_gte[reg];
gteVal = value;
gteVal.value = val;
return;
}
}
}
void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal)
{
// CPU[Rt] = GTE_D[Rd]
const u32 idx = instr.cop.Cop2Index();
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]);
PGXPValue& prdVal = g_state.pgxp_gte[idx];
prdVal.Validate(rdVal);
SetRtValue(instr, prdVal, rdVal);
}
void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal)
{
// GTE_D[Rd] = CPU[Rt]
const u32 idx = instr.cop.Cop2Index();
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
CPU_MTC2(idx, prtVal, rtVal);
}
void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal)
{
// GTE_D[Rt] = Mem[addr]
LOG_VALUES_LOAD(addr, rtVal);
const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal);
CPU_MTC2(static_cast<u32>(instr.r.rt.GetValue()), pMem, rtVal);
}
void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal)
{
// Mem[addr] = GTE_D[Rt]
const u32 idx = static_cast<u32>(instr.r.rt.GetValue());
PGXPValue& prtVal = g_state.pgxp_gte[idx];
#ifdef LOG_VALUES
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal);
std::fprintf(s_log, " addr=%08X", addr);
#endif
prtVal.Validate(rtVal);
WriteMem(addr, prtVal);
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex)
{
const s16 sx = static_cast<s16>(value & 0xFFFFu);
const s16 sy = static_cast<s16>(value >> 16);
DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023);
s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex;
}
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value)
{
const s16 sx = static_cast<s16>(value & 0xFFFFu);
const s16 sy = static_cast<s16>(value >> 16);
return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ?
&s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] :
nullptr;
}
ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p)
{
const s32 int_part = static_cast<s32>(p);
const float int_part_f = static_cast<float>(int_part);
return static_cast<float>(static_cast<s16>(int_part << 5) >> 5) + (p - int_part_f);
}
ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y)
{
const float tolerance = g_settings.gpu_pgxp_tolerance;
if (tolerance < 0.0f)
return true;
return (std::abs(precise_x - static_cast<float>(int_x)) <= tolerance &&
std::abs(precise_y - static_cast<float>(int_y)) <= tolerance);
}
bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
float* out_w)
{
const PGXPValue* vert = GetPtr(addr);
if (vert && ((vert->flags & VALID_XY) == VALID_XY) && (vert->value == value))
{
// There is a value here with valid X and Y coordinates
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
*out_w = vert->z / 32768.0f;
#ifdef LOG_LOOKUPS
GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y,
TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
std::abs(*out_y - y));
#endif
if (IsWithinTolerance(*out_x, *out_y, x, y))
{
// check validity of z component
return ((vert->flags & VALID_Z) == VALID_Z);
}
}
if (g_settings.gpu_pgxp_vertex_cache)
{
vert = GetCachedVertex(value);
if (vert && (vert->flags & VALID_XY) == VALID_XY)
{
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
*out_w = vert->z / 32768.0f;
if (IsWithinTolerance(*out_x, *out_y, x, y))
return false;
}
}
// no valid value can be found anywhere, use the native PSX data
*out_x = static_cast<float>(x);
*out_y = static_cast<float>(y);
*out_w = 1.0f;
return false;
}
void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im]
LOG_VALUES_LOAD(addr, rtVal);
SetRtValue(instr, ValidateAndLoadMem(addr, rtVal));
}
void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal)
{
LOG_VALUES_LOAD(addr, rtVal);
SetRtValue(instr, INVALID_VALUE);
}
void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im] (sign extended)
LOG_VALUES_LOAD(addr, rtVal);
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true);
}
void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal)
{
// Rt = Mem[Rs + Im] (zero extended)
LOG_VALUES_LOAD(addr, rtVal);
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false);
}
void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal)
{
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
WriteMem(addr, INVALID_VALUE);
}
void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal)
{
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
WriteMem16(addr, prtVal);
}
void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal)
{
// Mem[Rs + Im] = Rt
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
WriteMem(addr, prtVal);
}
void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal)
{
const u32 Rs = (rd_and_rs & 0xFFu);
const u32 Rd = (rd_and_rs >> 8);
CPU_MOVE(Rd, Rs, rsVal);
}
void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal)
{
#ifdef LOG_VALUES
const Instruction instr = {0};
LOG_VALUES_C1(Rs, rsVal);
#endif
PGXPValue& prsVal = g_state.pgxp_gpr[Rs];
prsVal.Validate(rsVal);
g_state.pgxp_gpr[Rd] = prsVal;
}
void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs + Imm (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
const u32 immVal = instr.i.imm_sext32();
PGXPValue& prtVal = GetRtValue(instr);
prtVal = prsVal;
if (immVal == 0)
return;
if (rsVal == 0)
{
// x is low precision value
prtVal.x = static_cast<float>(LOWORD_S16(immVal));
prtVal.y = static_cast<float>(HIWORD_S16(immVal));
prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z;
prtVal.value = immVal;
return;
}
prtVal.x = static_cast<float>(f16Unsign(prtVal.x));
prtVal.x += static_cast<float>(LOWORD_U16(immVal));
// carry on over/underflow
const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f;
prtVal.x = static_cast<float>(f16Sign(prtVal.x));
prtVal.y += HIWORD_S16(immVal) + of;
// truncate on overflow/underflow
prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
prtVal.value = rsVal + immVal;
prtVal.flags |= VALID_TAINTED_Z;
}
void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs & Imm
const u32 imm = instr.i.imm_zext32();
const u32 rtVal = rsVal & imm;
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = GetRtValue(instr);
// remove upper 16-bits
prtVal.y = 0.0f;
prtVal.z = prsVal.z;
prtVal.value = rtVal;
prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z;
switch (imm)
{
case 0:
{
// if 0 then x == 0
prtVal.x = 0.0f;
prtVal.flags |= VALID_X;
}
break;
case 0xFFFFu:
{
// if saturated then x == x
prtVal.x = prsVal.x;
}
break;
default:
{
// otherwise x is low precision value
prtVal.x = static_cast<float>(LOWORD_S16(rtVal));
prtVal.flags |= VALID_X;
}
break;
}
}
void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs | Imm
const u32 imm = instr.i.imm_zext32();
const u32 rtVal = rsVal | imm;
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& pRtVal = GetRtValue(instr);
pRtVal = pRsVal;
pRtVal.value = rtVal;
if (imm == 0) [[unlikely]]
{
// if 0 then x == x
}
else
{
// otherwise x is low precision value
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
}
}
void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs ^ Imm
const u32 imm = instr.i.imm_zext32();
const u32 rtVal = rsVal ^ imm;
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& pRtVal = GetRtValue(instr);
pRtVal = pRsVal;
pRtVal.value = rtVal;
if (imm == 0) [[unlikely]]
{
// if 0 then x == x
}
else
{
// otherwise x is low precision value
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
}
}
void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs < Imm (signed)
const s32 imm = instr.i.imm_s16();
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
const float fimmx = static_cast<float>(imm);
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
PGXPValue& prtVal = GetRtValue(instr);
prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f;
prtVal.y = 0.0f;
prtVal.z = prsVal.z;
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
prtVal.value = BoolToUInt32(static_cast<s32>(rsVal) < imm);
}
void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal)
{
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
// Rt = Rs < Imm (Unsigned)
const u32 imm = instr.i.imm_u16();
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
const float fimmx = static_cast<float>(static_cast<s16>(imm)); // deliberately signed
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
PGXPValue& prtVal = GetRtValue(instr);
prtVal.x =
(f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f;
prtVal.y = 0.0f;
prtVal.z = prsVal.z;
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
prtVal.value = BoolToUInt32(rsVal < imm);
}
void CPU::PGXP::CPU_LUI(Instruction instr)
{
LOG_VALUES_NV();
// Rt = Imm << 16
PGXPValue& pRtVal = GetRtValue(instr);
pRtVal.x = 0.0f;
pRtVal.y = static_cast<float>(instr.i.imm_s16());
pRtVal.z = 0.0f;
pRtVal.value = instr.i.imm_zext32() << 16;
pRtVal.flags = VALID_XY;
}
void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs + Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
if (rtVal == 0)
{
prdVal = prsVal;
CopyZIfMissing(prdVal, prtVal);
}
else if (rsVal == 0)
{
prdVal = prtVal;
CopyZIfMissing(prdVal, prsVal);
}
else
{
const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal));
// carry on over/underflow
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
prdVal.x = static_cast<float>(f16Sign(x));
prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of;
// truncate on overflow/underflow
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
prdVal.value = rsVal + rtVal;
// valid x/y only if one side had a valid x/y
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
}
}
void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs - Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
if (rtVal == 0)
{
prdVal = prsVal;
CopyZIfMissing(prdVal, prtVal);
}
else
{
const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal));
// carry on over/underflow
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
prdVal.x = static_cast<float>(f16Sign(x));
prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of);
// truncate on overflow/underflow
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
prdVal.value = rsVal - rtVal;
// valid x/y only if one side had a valid x/y
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
}
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal)
{
// Rd = Rs & Rt
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
float x, y;
if (LOWORD_U16(rdVal) == 0)
x = 0.0f;
else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal))
x = prsVal.GetValidX(rsVal);
else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal))
x = prtVal.GetValidX(rtVal);
else
x = static_cast<float>(LOWORD_S16(rdVal));
if (HIWORD_U16(rdVal) == 0)
y = 0.0f;
else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal))
y = prsVal.GetValidY(rsVal);
else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal))
y = prtVal.GetValidY(rtVal);
else
y = static_cast<float>(HIWORD_S16(rdVal));
// Why not write directly to prdVal? Because it might be the same as the source.
u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0;
PGXPValue& prdVal = GetRdValue(instr);
SelectZ(prdVal.z, flags, prsVal, prtVal);
prdVal.x = x;
prdVal.y = y;
prdVal.flags = flags;
prdVal.value = rdVal;
}
void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs & Rt
const u32 rdVal = rsVal & rtVal;
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
}
void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs | Rt
const u32 rdVal = rsVal | rtVal;
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
}
void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs ^ Rt
const u32 rdVal = rsVal ^ rtVal;
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
}
void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs NOR Rt
const u32 rdVal = ~(rsVal | rtVal);
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
}
void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs < Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) ||
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1.0f :
0.0f;
prdVal.y = 0.0f;
prdVal.z = prsVal.z;
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
prdVal.value = BoolToUInt32(static_cast<s32>(rsVal) < static_cast<s32>(rtVal));
}
void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Rd = Rs < Rt (unsigned)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) ||
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1.0f :
0.0f;
prdVal.y = 0.0f;
prdVal.z = prsVal.z;
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
prdVal.value = BoolToUInt32(rsVal < rtVal);
}
void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Hi/Lo = Rs * Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
ploVal = prsVal;
CopyZIfMissing(ploVal, prsVal);
// Z/valid is the same
phiVal = ploVal;
const float rsx = prsVal.GetValidX(rsVal);
const float rsy = prsVal.GetValidY(rsVal);
const float rtx = prtVal.GetValidX(rtVal);
const float rty = prtVal.GetValidY(rtVal);
// Multiply out components
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
const double xy = f16Unsign(rsx) * (rty);
const double yx = rsy * f16Unsign(rtx);
const double yy = rsy * rty;
// Split values into outputs
const double lx = xx;
const double ly = f16Overflow(xx) + (xy + yx);
const double hx = f16Overflow(ly) + yy;
const double hy = f16Overflow(hx);
ploVal.x = static_cast<float>(f16Sign(lx));
ploVal.y = static_cast<float>(f16Sign(ly));
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
phiVal.x = static_cast<float>(f16Sign(hx));
phiVal.y = static_cast<float>(f16Sign(hy));
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
// compute PSX value
const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal)));
phiVal.value = Truncate32(result >> 32);
ploVal.value = Truncate32(result);
}
void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Hi/Lo = Rs * Rt (unsigned)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
ploVal = prsVal;
CopyZIfMissing(ploVal, prsVal);
// Z/valid is the same
phiVal = ploVal;
const float rsx = prsVal.GetValidX(rsVal);
const float rsy = prsVal.GetValidY(rsVal);
const float rtx = prtVal.GetValidX(rtVal);
const float rty = prtVal.GetValidY(rtVal);
// Multiply out components
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
const double xy = f16Unsign(rsx) * f16Unsign(rty);
const double yx = f16Unsign(rsy) * f16Unsign(rtx);
const double yy = f16Unsign(rsy) * f16Unsign(rty);
// Split values into outputs
const double lx = xx;
const double ly = f16Overflow(xx) + (xy + yx);
const double hx = f16Overflow(ly) + yy;
const double hy = f16Overflow(hx);
ploVal.x = static_cast<float>(f16Sign(lx));
ploVal.y = static_cast<float>(f16Sign(ly));
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
phiVal.x = static_cast<float>(f16Sign(hx));
phiVal.y = static_cast<float>(f16Sign(hy));
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
// compute PSX value
const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal);
phiVal.value = Truncate32(result >> 32);
ploVal.value = Truncate32(result);
}
void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Lo = Rs / Rt (signed)
// Hi = Rs % Rt (signed)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
ploVal = prsVal;
CopyZIfMissing(ploVal, prsVal);
// Z/valid is the same
phiVal = ploVal;
const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast<double>(1 << 16);
const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast<double>(1 << 16);
const double lo = vs / vt;
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
ploVal.x = static_cast<float>(f16Sign(lo));
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
const double hi = std::fmod(vs, vt);
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
phiVal.x = static_cast<float>(f16Sign(hi));
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
// compute PSX value
if (static_cast<s32>(rtVal) == 0)
{
// divide by zero
ploVal.value = (static_cast<s32>(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal));
}
else if (rsVal == UINT32_C(0x80000000) && static_cast<s32>(rtVal) == -1)
{
// unrepresentable
ploVal.value = UINT32_C(0x80000000);
phiVal.value = 0;
}
else
{
ploVal.value = static_cast<u32>(static_cast<s32>(rsVal) / static_cast<s32>(rtVal));
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal) % static_cast<s32>(rtVal));
}
}
void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal)
{
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
// Lo = Rs / Rt (unsigned)
// Hi = Rs % Rt (unsigned)
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
ploVal = prsVal;
CopyZIfMissing(ploVal, prsVal);
// Z/valid is the same
phiVal = ploVal;
const double vs =
f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast<double>(1 << 16);
const double vt =
f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast<double>(1 << 16);
const double lo = vs / vt;
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
ploVal.x = static_cast<float>(f16Sign(lo));
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
const double hi = std::fmod(vs, vt);
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
phiVal.x = static_cast<float>(f16Sign(hi));
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
if (rtVal == 0)
{
// divide by zero
ploVal.value = UINT32_C(0xFFFFFFFF);
phiVal.value = rsVal;
}
else
{
ploVal.value = rsVal / rtVal;
phiVal.value = rsVal % rtVal;
}
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh)
{
const u32 rdVal = rtVal << sh;
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = GetRdValue(instr);
prdVal.z = prtVal.z;
prdVal.value = rdVal;
if (sh >= 32) [[unlikely]]
{
prdVal.x = 0.0f;
prdVal.y = 0.0f;
prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z;
}
else if (sh == 16)
{
prdVal.y = prtVal.x;
prdVal.x = 0.0f;
// Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value
// instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid.
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
}
else if (sh >= 16)
{
prdVal.y = static_cast<float>(f16Sign(f16Unsign(prtVal.x * static_cast<double>(1 << (sh - 16)))));
prdVal.x = 0.0f;
// See above.
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
}
else
{
const double x = f16Unsign(prtVal.x) * static_cast<double>(1 << sh);
const double y = (f16Unsign(prtVal.y) * static_cast<double>(1 << sh)) + f16Overflow(x);
prdVal.x = static_cast<float>(f16Sign(x));
prdVal.y = static_cast<float>(f16Sign(y));
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z);
}
}
void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal)
{
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
// Rd = Rt << Sa
const u32 sh = instr.r.shamt;
CPU_SLL(instr, rtVal, sh);
}
void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal)
{
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
// Rd = Rt << Rs
const u32 sh = rsVal & 0x1F;
CPU_SLL(instr, rtVal, sh);
}
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable)
{
const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh);
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
double x = prtVal.x;
double y = sign ? prtVal.y : f16Unsign(prtVal.y);
const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y
const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
// Shift test values
const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh);
if (LOWORD_S16(dX) != HIWORD_S16(iX))
x = x / static_cast<double>(1 << sh);
else
x = LOWORD_S16(dX); // only sign bits left
if (LOWORD_S16(dY) != HIWORD_S16(iX))
{
if (sh == 16)
{
x = y;
}
else if (sh < 16)
{
x += y * static_cast<double>(1 << (16 - sh));
if (prtVal.x < 0)
x += static_cast<double>(1 << (16 - sh));
}
else
{
x += y / static_cast<double>(1 << (sh - 16));
}
}
if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
y = HIWORD_S16(dY);
else
y = y / static_cast<double>(1 << sh);
PGXPValue& prdVal = GetRdValue(instr);
// Use low precision/rounded values when we're not shifting an entire component,
// and it's not originally from a 3D value. Too many false positives in P2/etc.
// What we probably should do is not set the valid flag on non-3D values to begin
// with, only letting them become valid when used in another expression.
if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16)
{
prdVal.x = static_cast<float>(LOWORD_S16(rdVal));
prdVal.y = static_cast<float>(HIWORD_S16(rdVal));
prdVal.z = 0.0f;
prdVal.value = rdVal;
prdVal.flags = VALID_XY | VALID_TAINTED_Z;
}
else
{
prdVal.x = static_cast<float>(f16Sign(x));
prdVal.y = static_cast<float>(f16Sign(y));
prdVal.z = prtVal.z;
prdVal.value = rdVal;
prdVal.flags = prtVal.flags | VALID_TAINTED_Z;
}
}
void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
{
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
// Rd = Rt >> Sa
const u32 sh = instr.r.shamt;
CPU_SRx(instr, rtVal, sh, false, false);
}
void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
{
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
// Rd = Rt >> Sa
const u32 sh = rsVal & 0x1F;
CPU_SRx(instr, rtVal, sh, false, true);
}
void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
{
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
// Rd = Rt >> Sa
const u32 sh = instr.r.shamt;
CPU_SRx(instr, rtVal, sh, true, false);
}
void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
{
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
// Rd = Rt >> Sa
const u32 sh = rsVal & 0x1F;
CPU_SRx(instr, rtVal, sh, true, true);
}
void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)
{
const u32 idx = static_cast<u8>(instr.r.rd.GetValue());
LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]);
// CPU[Rt] = CP0[Rd]
PGXPValue& prdVal = g_state.pgxp_cop0[idx];
prdVal.Validate(rdVal);
PGXPValue& prtVal = GetRtValue(instr);
prtVal = prdVal;
prtVal.value = rdVal;
}
void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal)
{
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
// CP0[Rd] = CPU[Rt]
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
PGXPValue& prdVal = g_state.pgxp_cop0[static_cast<u8>(instr.r.rd.GetValue())];
prdVal = prtVal;
prtVal.value = rdVal;
}