// SPDX-FileCopyrightText: 2016 iCatButler, 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 // // This file has been completely rewritten over the years compared to the original PCSXR-PGXP release. // No original code remains. The original copyright notice is included above for historical purposes. // #include "cpu_pgxp.h" #include "bus.h" #include "cpu_core.h" #include "cpu_disasm.h" #include "gpu_types.h" #include "settings.h" #include "util/gpu_device.h" #include "common/assert.h" #include "common/log.h" #include #include LOG_CHANNEL(CPU); // #define LOG_VALUES 1 // #define LOG_LOOKUPS 1 // TODO: Don't update flags on Validate(), instead return it. namespace CPU::PGXP { enum : u32 { VERTEX_CACHE_WIDTH = 2048, VERTEX_CACHE_HEIGHT = 2048, VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT, PGXP_MEM_SIZE = (static_cast(Bus::RAM_8MB_SIZE) + static_cast(CPU::SCRATCHPAD_SIZE)) / 4, PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4, }; enum : u32 { VALID_X = (1u << 0), VALID_Y = (1u << 1), VALID_Z = (1u << 2), VALID_LOWZ = (1u << 16), // Valid Z from the low part of a 32-bit value. VALID_HIGHZ = (1u << 17), // Valid Z from the high part of a 32-bit value. VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate. VALID_XY = (VALID_X | VALID_Y), VALID_XYZ = (VALID_X | VALID_Y | VALID_Z), VALID_ALL = (VALID_X | VALID_Y | VALID_Z), }; #define LOWORD_U16(val) (static_cast(val)) #define HIWORD_U16(val) (static_cast(static_cast(val) >> 16)) #define LOWORD_S16(val) (static_cast(static_cast(val))) #define HIWORD_S16(val) (static_cast(static_cast(static_cast(val) >> 16))) #define SET_LOWORD(val, loword) ((static_cast(val) & 0xFFFF0000u) | static_cast(static_cast(loword))) #define SET_HIWORD(val, hiword) ((static_cast(val) & 0x0000FFFFu) | (static_cast(hiword) << 16)) static double f16Sign(double val); static double f16Unsign(double val); static double f16Overflow(double val); static void CacheVertex(u32 value, const PGXPValue& vertex); static PGXPValue* GetCachedVertex(u32 value); static float TruncateVertexPosition(float p); static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y); static PGXPValue& GetRdValue(Instruction instr); static PGXPValue& GetRtValue(Instruction instr); static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal); static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal); static void SetRtValue(Instruction instr, const PGXPValue& val); static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal); static PGXPValue& GetSXY0(); static PGXPValue& GetSXY1(); static PGXPValue& GetSXY2(); static PGXPValue& PushSXY(); static PGXPValue* GetPtr(u32 addr); static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value); static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign); static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val); static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal); static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh); static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable); static void WriteMem(u32 addr, const PGXPValue& value); static void WriteMem16(u32 addr, const PGXPValue& value); static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src); static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2); #ifdef LOG_VALUES static void LogInstruction(u32 pc, Instruction instr); static void LogValue(const char* name, u32 rval, const PGXPValue* val); static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val); // clang-format off #define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0) #define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0) #define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast(rnum)), rval, &g_state.pgxp_gpr[static_cast(rnum)]); } while(0) #define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast(r1num)), r1val, &g_state.pgxp_gpr[static_cast(r1num)]); LogValue(CPU::GetRegName(static_cast(r2num)), r2val, &g_state.pgxp_gpr[static_cast(r2num)]); } while(0) #define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0) #define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0) #else #define LOG_VALUES_NV() (void)0 #define LOG_VALUES_1(name, rval, val) (void)0 #define LOG_VALUES_C1(rnum, rval) (void)0 #define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0 #define LOG_VALUES_LOAD(addr, val) (void)0 #define LOG_VALUES_STORE(rnum, rval, addr) (void)0 #endif // clang-format on static constexpr const PGXPValue INVALID_VALUE = {}; static PGXPValue* s_mem = nullptr; static PGXPValue* s_vertex_cache = nullptr; #ifdef LOG_VALUES static std::FILE* s_log; #endif } // namespace CPU::PGXP void CPU::PGXP::Initialize() { std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr)); std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0)); std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte)); if (!s_mem) { s_mem = static_cast(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue))); if (!s_mem) Panic("Failed to allocate PGXP memory"); } if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache) { s_vertex_cache = static_cast(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue))); if (!s_vertex_cache) { ERROR_LOG("Failed to allocate memory for vertex cache, disabling."); g_settings.gpu_pgxp_vertex_cache = false; } } if (s_vertex_cache) std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE); } void CPU::PGXP::Reset() { std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr)); std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0)); std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte)); if (s_mem) std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE); if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache) std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE); } void CPU::PGXP::Shutdown() { if (s_vertex_cache) { std::free(s_vertex_cache); s_vertex_cache = nullptr; } if (s_mem) { std::free(s_mem); s_mem = nullptr; } std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte)); std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr)); std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0)); } ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val) { const s32 s = static_cast(static_cast(val * (USHRT_MAX + 1))); return static_cast(s) / static_cast(USHRT_MAX + 1); } ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val) { return (val >= 0) ? val : (val + (USHRT_MAX + 1)); } ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val) { return static_cast(static_cast(val) >> 16); } ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr) { return g_state.pgxp_gpr[static_cast(instr.r.rd.GetValue())]; } ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr) { return g_state.pgxp_gpr[static_cast(instr.r.rt.GetValue())]; } ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal) { PGXPValue& ret = g_state.pgxp_gpr[static_cast(instr.r.rt.GetValue())]; ret.Validate(rtVal); return ret; } ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal) { PGXPValue& ret = g_state.pgxp_gpr[static_cast(instr.r.rs.GetValue())]; ret.Validate(rsVal); return ret; } ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val) { g_state.pgxp_gpr[static_cast(instr.r.rt.GetValue())] = val; } ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal) { PGXPValue& prtVal = g_state.pgxp_gpr[static_cast(instr.r.rt.GetValue())]; prtVal = val; prtVal.value = rtVal; } ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY0() { return g_state.pgxp_gte[12]; } ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY1() { return g_state.pgxp_gte[13]; } ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY2() { return g_state.pgxp_gte[14]; } ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::PushSXY() { g_state.pgxp_gte[12] = g_state.pgxp_gte[13]; g_state.pgxp_gte[13] = g_state.pgxp_gte[14]; return g_state.pgxp_gte[14]; } ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr) { #if 0 if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 && (addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4) __debugbreak(); #endif if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR) return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)]; const u32 paddr = (addr & PHYSICAL_MEMORY_ADDRESS_MASK); if (paddr < Bus::RAM_MIRROR_END) return &s_mem[(paddr & Bus::g_ram_mask) >> 2]; else return nullptr; } ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value) { PGXPValue* pMem = GetPtr(addr); if (!pMem) [[unlikely]] return INVALID_VALUE; pMem->Validate(value); return *pMem; } ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign) { PGXPValue* pMem = GetPtr(addr); if (!pMem) [[unlikely]] { dest = INVALID_VALUE; return; } // determine if high or low word const bool hiword = ((addr & 2) != 0); // only validate the component we're interested in pMem->flags = hiword ? ((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) : ((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X)); // copy whole value dest = *pMem; // if high word then shift if (hiword) { dest.x = dest.y; dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1); } // only set y as valid if x is also valid.. don't want to make fake values if (dest.flags & VALID_X) { dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f; dest.flags |= VALID_Y; } else { dest.y = 0.0f; dest.flags &= ~VALID_Y; } dest.value = value; } ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value) { PGXPValue* pMem = GetPtr(addr); if (!pMem) [[unlikely]] return; *pMem = value; pMem->flags |= VALID_LOWZ | VALID_HIGHZ; } ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value) { PGXPValue* dest = GetPtr(addr); if (!dest) [[unlikely]] return; // determine if high or low word const bool hiword = ((addr & 2) != 0); if (hiword) { dest->y = value.x; dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1); dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16); } else { dest->x = value.x; dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X); dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF)); } // overwrite z/w if valid // TODO: Check modified if (value.flags & VALID_Z) { dest->z = value.z; dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ); } else { dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ; if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ))) dest->flags &= ~VALID_Z; } } ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src) { dst.z = (dst.flags & VALID_Z) ? dst.z : src.z; dst.flags |= (src.flags & VALID_Z); } ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2) { // Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise. dst_z = (!(src1.flags & VALID_Z) || (src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ? src2.z : src1.z; dst_flags |= ((src1.flags | src2.flags) & VALID_Z); } #ifdef LOG_VALUES void CPU::PGXP::LogInstruction(u32 pc, Instruction instr) { if (!s_log) [[unlikely]] { s_log = std::fopen("pgxp.log", "wb"); } else { std::fflush(s_log); std::fputc('\n', s_log); } SmallString str; DisassembleInstruction(&str, pc, instr.bits); std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str()); } void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val) { if (!s_log) [[unlikely]] return; SmallString str; LogValueStr(str, name, rval, val); std::fprintf(s_log, " %s", str.c_str()); } void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val) { str.append_format("{}=[{:08X}", name, rval); if (!val) { str.append(", NULL]"); } else { if (val->value != rval) str.append_format(", PGXP{:08X}", val->value); str.append_format(", {{{},{},{}}}", val->x, val->y, val->z); if (val->flags & VALID_ALL) { str.append(", valid="); if (val->flags & VALID_X) str.append('X'); if (val->flags & VALID_Y) str.append('Y'); if (val->flags & VALID_Z) str.append('Z'); } // if (val->flags & VALID_TAINTED_Z) // str.append(", tainted"); str.append(']'); } } #endif void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value) { PGXPValue& pvalue = PushSXY(); pvalue.x = x; pvalue.y = y; pvalue.z = z; pvalue.value = value; pvalue.flags = VALID_ALL; if (g_settings.gpu_pgxp_vertex_cache) CacheVertex(value, pvalue); } bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2) { PGXPValue& SXY0 = GetSXY0(); SXY0.Validate(sxy0); PGXPValue& SXY1 = GetSXY1(); SXY1.Validate(sxy1); PGXPValue& SXY2 = GetSXY2(); SXY2.Validate(sxy2); // Don't use accurate clipping for game-constructed values, which don't have a valid Z. return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ)); } float CPU::PGXP::GTE_NCLIP() { const PGXPValue& SXY0 = GetSXY0(); const PGXPValue& SXY1 = GetSXY1(); const PGXPValue& SXY2 = GetSXY2(); float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) - (SXY2.x * SXY1.y)); // ensure fractional values are not incorrectly rounded to 0 const float nclip_abs = std::abs(nclip); if (0.1f < nclip_abs && nclip_abs < 1.0f) nclip += (nclip < 0.0f ? -1.0f : 1.0f); return nclip; } ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val) { switch (reg) { case 15: { // push FIFO PGXPValue& SXY2 = PushSXY(); SXY2 = value; return; } // read-only registers case 29: case 31: { return; } default: { PGXPValue& gteVal = g_state.pgxp_gte[reg]; gteVal = value; gteVal.value = val; return; } } } void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal) { // CPU[Rt] = GTE_D[Rd] const u32 idx = instr.cop.Cop2Index(); LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]); PGXPValue& prdVal = g_state.pgxp_gte[idx]; prdVal.Validate(rdVal); SetRtValue(instr, prdVal, rdVal); } void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal) { // GTE_D[Rd] = CPU[Rt] const u32 idx = instr.cop.Cop2Index(); LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); CPU_MTC2(idx, prtVal, rtVal); } void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal) { // GTE_D[Rt] = Mem[addr] LOG_VALUES_LOAD(addr, rtVal); const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal); CPU_MTC2(static_cast(instr.r.rt.GetValue()), pMem, rtVal); } void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal) { // Mem[addr] = GTE_D[Rt] const u32 idx = static_cast(instr.r.rt.GetValue()); PGXPValue& prtVal = g_state.pgxp_gte[idx]; #ifdef LOG_VALUES LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal); std::fprintf(s_log, " addr=%08X", addr); #endif prtVal.Validate(rtVal); WriteMem(addr, prtVal); } ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex) { const s16 sx = static_cast(value & 0xFFFFu); const s16 sy = static_cast(value >> 16); DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023); s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex; } ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value) { const s16 sx = static_cast(value & 0xFFFFu); const s16 sy = static_cast(value >> 16); return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ? &s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] : nullptr; } ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p) { // Truncates positions to 11 bits before drawing. // Matches GPU command parsing, where the upper 5 bits are dropped. // Necessary for Jet Moto and Racingroovy VS. const s32 int_part = static_cast(p); const float int_part_f = static_cast(int_part); return static_cast(TruncateGPUVertexPosition(int_part)) + (p - int_part_f); } ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y) { const float tolerance = g_settings.gpu_pgxp_tolerance; if (tolerance < 0.0f) return true; return (std::abs(precise_x - static_cast(int_x)) <= tolerance && std::abs(precise_y - static_cast(int_y)) <= tolerance); } bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y, float* out_w) { const PGXPValue* vert = GetPtr(addr); if (vert && (vert->flags & VALID_XY) == VALID_XY && vert->value == value) { *out_x = TruncateVertexPosition(vert->x) + static_cast(xOffs); *out_y = TruncateVertexPosition(vert->y) + static_cast(yOffs); *out_w = vert->z / 32768.0f; #ifdef LOG_LOOKUPS GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y, TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x), std::abs(*out_y - y)); #endif if (IsWithinTolerance(*out_x, *out_y, x, y)) { // check validity of z component return ((vert->flags & VALID_Z) == VALID_Z); } } if (g_settings.gpu_pgxp_vertex_cache) { vert = GetCachedVertex(value); if (vert && (vert->flags & VALID_XY) == VALID_XY) { *out_x = TruncateVertexPosition(vert->x) + static_cast(xOffs); *out_y = TruncateVertexPosition(vert->y) + static_cast(yOffs); *out_w = vert->z / 32768.0f; if (IsWithinTolerance(*out_x, *out_y, x, y)) return false; } } // no valid value can be found anywhere, use the native PSX data *out_x = static_cast(x); *out_y = static_cast(y); *out_w = 1.0f; return false; } void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal) { // Rt = Mem[Rs + Im] LOG_VALUES_LOAD(addr, rtVal); SetRtValue(instr, ValidateAndLoadMem(addr, rtVal)); } void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal) { LOG_VALUES_LOAD(addr, rtVal); SetRtValue(instr, INVALID_VALUE); } void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal) { // Rt = Mem[Rs + Im] (sign extended) LOG_VALUES_LOAD(addr, rtVal); ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true); } void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal) { // Rt = Mem[Rs + Im] (zero extended) LOG_VALUES_LOAD(addr, rtVal); ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false); } void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal) { LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr); WriteMem(addr, INVALID_VALUE); } void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal) { LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); WriteMem16(addr, prtVal); } void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal) { // Mem[Rs + Im] = Rt LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); WriteMem(addr, prtVal); } void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal) { const u32 Rs = (rd_and_rs & 0xFFu); const u32 Rd = (rd_and_rs >> 8); CPU_MOVE(Rd, Rs, rsVal); } void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal) { #ifdef LOG_VALUES const Instruction instr = {0}; LOG_VALUES_C1(Rs, rsVal); #endif PGXPValue& prsVal = g_state.pgxp_gpr[Rs]; prsVal.Validate(rsVal); g_state.pgxp_gpr[Rd] = prsVal; } void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal) { LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); // Rt = Rs + Imm (signed) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); const u32 immVal = instr.i.imm_sext32(); PGXPValue& prtVal = GetRtValue(instr); prtVal = prsVal; if (immVal == 0) return; if (rsVal == 0) { // x is low precision value prtVal.x = static_cast(LOWORD_S16(immVal)); prtVal.y = static_cast(HIWORD_S16(immVal)); prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z; prtVal.value = immVal; return; } prtVal.x = static_cast(f16Unsign(prtVal.x)); prtVal.x += static_cast(LOWORD_U16(immVal)); // carry on over/underflow const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f; prtVal.x = static_cast(f16Sign(prtVal.x)); prtVal.y += HIWORD_S16(immVal) + of; // truncate on overflow/underflow prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f; prtVal.value = rsVal + immVal; prtVal.flags |= VALID_TAINTED_Z; } void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal) { LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); // Rt = Rs & Imm const u32 imm = instr.i.imm_zext32(); const u32 rtVal = rsVal & imm; PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = GetRtValue(instr); // remove upper 16-bits prtVal.y = 0.0f; prtVal.z = prsVal.z; prtVal.value = rtVal; prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z; switch (imm) { case 0: { // if 0 then x == 0 prtVal.x = 0.0f; prtVal.flags |= VALID_X; } break; case 0xFFFFu: { // if saturated then x == x prtVal.x = prsVal.x; } break; default: { // otherwise x is low precision value prtVal.x = static_cast(LOWORD_S16(rtVal)); prtVal.flags |= VALID_X; } break; } } void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal) { LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); // Rt = Rs | Imm const u32 imm = instr.i.imm_zext32(); const u32 rtVal = rsVal | imm; PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& pRtVal = GetRtValue(instr); pRtVal = pRsVal; pRtVal.value = rtVal; if (imm == 0) [[unlikely]] { // if 0 then x == x } else { // otherwise x is low precision value pRtVal.x = static_cast(LOWORD_S16(rtVal)); pRtVal.flags |= VALID_X | VALID_TAINTED_Z; } } void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal) { LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); // Rt = Rs ^ Imm const u32 imm = instr.i.imm_zext32(); const u32 rtVal = rsVal ^ imm; PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& pRtVal = GetRtValue(instr); pRtVal = pRsVal; pRtVal.value = rtVal; if (imm == 0) [[unlikely]] { // if 0 then x == x } else { // otherwise x is low precision value pRtVal.x = static_cast(LOWORD_S16(rtVal)); pRtVal.flags |= VALID_X | VALID_TAINTED_Z; } } void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal) { LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); // Rt = Rs < Imm (signed) const s32 imm = instr.i.imm_s16(); PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); const float fimmx = static_cast(imm); const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f; PGXPValue& prtVal = GetRtValue(instr); prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f; prtVal.y = 0.0f; prtVal.z = prsVal.z; prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z; prtVal.value = BoolToUInt32(static_cast(rsVal) < imm); } void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal) { LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal); // Rt = Rs < Imm (Unsigned) const u32 imm = instr.i.imm_u16(); PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); const float fimmx = static_cast(static_cast(imm)); // deliberately signed const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f; PGXPValue& prtVal = GetRtValue(instr); prtVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f; prtVal.y = 0.0f; prtVal.z = prsVal.z; prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z; prtVal.value = BoolToUInt32(rsVal < imm); } void CPU::PGXP::CPU_LUI(Instruction instr) { LOG_VALUES_NV(); // Rt = Imm << 16 PGXPValue& pRtVal = GetRtValue(instr); pRtVal.x = 0.0f; pRtVal.y = static_cast(instr.i.imm_s16()); pRtVal.z = 0.0f; pRtVal.value = instr.i.imm_zext32() << 16; pRtVal.flags = VALID_XY; } void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Rd = Rs + Rt (signed) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& prdVal = GetRdValue(instr); if (rtVal == 0) { prdVal = prsVal; CopyZIfMissing(prdVal, prtVal); } else if (rsVal == 0) { prdVal = prtVal; CopyZIfMissing(prdVal, prsVal); } else { const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal)); // carry on over/underflow const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f; prdVal.x = static_cast(f16Sign(x)); prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of; // truncate on overflow/underflow prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f; prdVal.value = rsVal + rtVal; // valid x/y only if one side had a valid x/y prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z; SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal); } } void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Rd = Rs - Rt (signed) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& prdVal = GetRdValue(instr); if (rtVal == 0) { prdVal = prsVal; CopyZIfMissing(prdVal, prtVal); } else { const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal)); // carry on over/underflow const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f; prdVal.x = static_cast(f16Sign(x)); prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of); // truncate on overflow/underflow prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f; prdVal.value = rsVal - rtVal; // valid x/y only if one side had a valid x/y prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z; SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal); } } ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal) { // Rd = Rs & Rt PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); float x, y; if (LOWORD_U16(rdVal) == 0) x = 0.0f; else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal)) x = prsVal.GetValidX(rsVal); else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal)) x = prtVal.GetValidX(rtVal); else x = static_cast(LOWORD_S16(rdVal)); if (HIWORD_U16(rdVal) == 0) y = 0.0f; else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal)) y = prsVal.GetValidY(rsVal); else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal)) y = prtVal.GetValidY(rtVal); else y = static_cast(HIWORD_S16(rdVal)); // Why not write directly to prdVal? Because it might be the same as the source. u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0; PGXPValue& prdVal = GetRdValue(instr); SelectZ(prdVal.z, flags, prsVal, prtVal); prdVal.x = x; prdVal.y = y; prdVal.flags = flags; prdVal.value = rdVal; } void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Rd = Rs & Rt const u32 rdVal = rsVal & rtVal; CPU_BITWISE(instr, rdVal, rsVal, rtVal); } void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Rd = Rs | Rt const u32 rdVal = rsVal | rtVal; CPU_BITWISE(instr, rdVal, rsVal, rtVal); } void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Rd = Rs ^ Rt const u32 rdVal = rsVal ^ rtVal; CPU_BITWISE(instr, rdVal, rsVal, rtVal); } void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Rd = Rs NOR Rt const u32 rdVal = ~(rsVal | rtVal); CPU_BITWISE(instr, rdVal, rsVal, rtVal); } void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Rd = Rs < Rt (signed) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& prdVal = GetRdValue(instr); prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) || f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ? 1.0f : 0.0f; prdVal.y = 0.0f; prdVal.z = prsVal.z; prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y; prdVal.value = BoolToUInt32(static_cast(rsVal) < static_cast(rtVal)); } void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Rd = Rs < Rt (unsigned) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& prdVal = GetRdValue(instr); prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) || f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ? 1.0f : 0.0f; prdVal.y = 0.0f; prdVal.z = prsVal.z; prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y; prdVal.value = BoolToUInt32(rsVal < rtVal); } void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Hi/Lo = Rs * Rt (signed) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& ploVal = g_state.pgxp_gpr[static_cast(Reg::lo)]; PGXPValue& phiVal = g_state.pgxp_gpr[static_cast(Reg::hi)]; ploVal = prsVal; CopyZIfMissing(ploVal, prsVal); // Z/valid is the same phiVal = ploVal; const float rsx = prsVal.GetValidX(rsVal); const float rsy = prsVal.GetValidY(rsVal); const float rtx = prtVal.GetValidX(rtVal); const float rty = prtVal.GetValidY(rtVal); // Multiply out components const double xx = f16Unsign(rsx) * f16Unsign(rtx); const double xy = f16Unsign(rsx) * (rty); const double yx = rsy * f16Unsign(rtx); const double yy = rsy * rty; // Split values into outputs const double lx = xx; const double ly = f16Overflow(xx) + (xy + yx); const double hx = f16Overflow(ly) + yy; const double hy = f16Overflow(hx); ploVal.x = static_cast(f16Sign(lx)); ploVal.y = static_cast(f16Sign(ly)); ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); phiVal.x = static_cast(f16Sign(hx)); phiVal.y = static_cast(f16Sign(hy)); phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); // compute PSX value const u64 result = static_cast(static_cast(SignExtend64(rsVal)) * static_cast(SignExtend64(rtVal))); phiVal.value = Truncate32(result >> 32); ploVal.value = Truncate32(result); } void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Hi/Lo = Rs * Rt (unsigned) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& ploVal = g_state.pgxp_gpr[static_cast(Reg::lo)]; PGXPValue& phiVal = g_state.pgxp_gpr[static_cast(Reg::hi)]; ploVal = prsVal; CopyZIfMissing(ploVal, prsVal); // Z/valid is the same phiVal = ploVal; const float rsx = prsVal.GetValidX(rsVal); const float rsy = prsVal.GetValidY(rsVal); const float rtx = prtVal.GetValidX(rtVal); const float rty = prtVal.GetValidY(rtVal); // Multiply out components const double xx = f16Unsign(rsx) * f16Unsign(rtx); const double xy = f16Unsign(rsx) * f16Unsign(rty); const double yx = f16Unsign(rsy) * f16Unsign(rtx); const double yy = f16Unsign(rsy) * f16Unsign(rty); // Split values into outputs const double lx = xx; const double ly = f16Overflow(xx) + (xy + yx); const double hx = f16Overflow(ly) + yy; const double hy = f16Overflow(hx); ploVal.x = static_cast(f16Sign(lx)); ploVal.y = static_cast(f16Sign(ly)); ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); phiVal.x = static_cast(f16Sign(hx)); phiVal.y = static_cast(f16Sign(hy)); phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); // compute PSX value const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal); phiVal.value = Truncate32(result >> 32); ploVal.value = Truncate32(result); } void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Lo = Rs / Rt (signed) // Hi = Rs % Rt (signed) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& ploVal = g_state.pgxp_gpr[static_cast(Reg::lo)]; PGXPValue& phiVal = g_state.pgxp_gpr[static_cast(Reg::hi)]; ploVal = prsVal; CopyZIfMissing(ploVal, prsVal); // Z/valid is the same phiVal = ploVal; const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast(1 << 16); const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast(1 << 16); const double lo = vs / vt; ploVal.y = static_cast(f16Sign(f16Overflow(lo))); ploVal.x = static_cast(f16Sign(lo)); ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); const double hi = std::fmod(vs, vt); phiVal.y = static_cast(f16Sign(f16Overflow(hi))); phiVal.x = static_cast(f16Sign(hi)); phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); // compute PSX value if (static_cast(rtVal) == 0) { // divide by zero ploVal.value = (static_cast(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1); phiVal.value = static_cast(static_cast(rsVal)); } else if (rsVal == UINT32_C(0x80000000) && static_cast(rtVal) == -1) { // unrepresentable ploVal.value = UINT32_C(0x80000000); phiVal.value = 0; } else { ploVal.value = static_cast(static_cast(rsVal) / static_cast(rtVal)); phiVal.value = static_cast(static_cast(rsVal) % static_cast(rtVal)); } } void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal) { LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal); // Lo = Rs / Rt (unsigned) // Hi = Rs % Rt (unsigned) PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& ploVal = g_state.pgxp_gpr[static_cast(Reg::lo)]; PGXPValue& phiVal = g_state.pgxp_gpr[static_cast(Reg::hi)]; ploVal = prsVal; CopyZIfMissing(ploVal, prsVal); // Z/valid is the same phiVal = ploVal; const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast(1 << 16); const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast(1 << 16); const double lo = vs / vt; ploVal.y = static_cast(f16Sign(f16Overflow(lo))); ploVal.x = static_cast(f16Sign(lo)); ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); const double hi = std::fmod(vs, vt); phiVal.y = static_cast(f16Sign(f16Overflow(hi))); phiVal.x = static_cast(f16Sign(hi)); phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY); if (rtVal == 0) { // divide by zero ploVal.value = UINT32_C(0xFFFFFFFF); phiVal.value = rsVal; } else { ploVal.value = rsVal / rtVal; phiVal.value = rsVal % rtVal; } } ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh) { const u32 rdVal = rtVal << sh; PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& prdVal = GetRdValue(instr); prdVal.z = prtVal.z; prdVal.value = rdVal; if (sh >= 32) [[unlikely]] { prdVal.x = 0.0f; prdVal.y = 0.0f; prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z; } else if (sh == 16) { prdVal.y = prtVal.x; prdVal.x = 0.0f; // Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value // instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid. // prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z; prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1); } else if (sh >= 16) { prdVal.y = static_cast(f16Sign(f16Unsign(prtVal.x * static_cast(1 << (sh - 16))))); prdVal.x = 0.0f; // See above. // prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z; prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1); } else { const double x = f16Unsign(prtVal.x) * static_cast(1 << sh); const double y = (f16Unsign(prtVal.y) * static_cast(1 << sh)) + f16Overflow(x); prdVal.x = static_cast(f16Sign(x)); prdVal.y = static_cast(f16Sign(y)); prdVal.flags = (prtVal.flags | VALID_TAINTED_Z); } } void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal) { LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); // Rd = Rt << Sa const u32 sh = instr.r.shamt; CPU_SLL(instr, rtVal, sh); } void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal) { LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal); // Rd = Rt << Rs const u32 sh = rsVal & 0x1F; CPU_SLL(instr, rtVal, sh); } ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable) { const u32 rdVal = sign ? static_cast(static_cast(rtVal) >> sh) : (rtVal >> sh); PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); double x = prtVal.x; double y = sign ? prtVal.y : f16Unsign(prtVal.y); const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x) // Shift test values const u32 dX = static_cast(static_cast(iX) >> sh); const u32 dY = sign ? static_cast(static_cast(iY) >> sh) : (iY >> sh); if (LOWORD_S16(dX) != HIWORD_S16(iX)) x = x / static_cast(1 << sh); else x = LOWORD_S16(dX); // only sign bits left if (LOWORD_S16(dY) != HIWORD_S16(iX)) { if (sh == 16) { x = y; } else if (sh < 16) { x += y * static_cast(1 << (16 - sh)); if (prtVal.x < 0) x += static_cast(1 << (16 - sh)); } else { x += y / static_cast(1 << (sh - 16)); } } if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1)) y = HIWORD_S16(dY); else y = y / static_cast(1 << sh); PGXPValue& prdVal = GetRdValue(instr); // Use low precision/rounded values when we're not shifting an entire component, // and it's not originally from a 3D value. Too many false positives in P2/etc. // What we probably should do is not set the valid flag on non-3D values to begin // with, only letting them become valid when used in another expression. if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16) { prdVal.x = static_cast(LOWORD_S16(rdVal)); prdVal.y = static_cast(HIWORD_S16(rdVal)); prdVal.z = 0.0f; prdVal.value = rdVal; prdVal.flags = VALID_XY | VALID_TAINTED_Z; } else { prdVal.x = static_cast(f16Sign(x)); prdVal.y = static_cast(f16Sign(y)); prdVal.z = prtVal.z; prdVal.value = rdVal; prdVal.flags = prtVal.flags | VALID_TAINTED_Z; } } void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal) { LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); // Rd = Rt >> Sa const u32 sh = instr.r.shamt; CPU_SRx(instr, rtVal, sh, false, false); } void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal) { LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal); // Rd = Rt >> Sa const u32 sh = rsVal & 0x1F; CPU_SRx(instr, rtVal, sh, false, true); } void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal) { LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); // Rd = Rt >> Sa const u32 sh = instr.r.shamt; CPU_SRx(instr, rtVal, sh, true, false); } void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal) { LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal); // Rd = Rt >> Sa const u32 sh = rsVal & 0x1F; CPU_SRx(instr, rtVal, sh, true, true); } void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal) { const u32 idx = static_cast(instr.r.rd.GetValue()); LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]); // CPU[Rt] = CP0[Rd] PGXPValue& prdVal = g_state.pgxp_cop0[idx]; prdVal.Validate(rdVal); PGXPValue& prtVal = GetRtValue(instr); prtVal = prdVal; prtVal.value = rdVal; } void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal) { LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal); // CP0[Rd] = CPU[Rt] PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal); PGXPValue& prdVal = g_state.pgxp_cop0[static_cast(instr.r.rd.GetValue())]; prdVal = prtVal; prtVal.value = rdVal; }