mirror of
https://github.com/stenzek/duckstation.git
synced 2025-06-07 03:55:33 +00:00
CPU/CodeCache: Simplify code LUT addressing
One more instruction on x86/ARM32, no additional instructions on ARM64. Worth it so that the application doesn't crash if the game jumps to an invalid PC. Note that the lower 2 bits are truncated, so an unaligned jump will round down to the closest instruction. Obviously not correct, but if a game ends up doing this, it's a lost cause anyway.
This commit is contained in:
parent
4e5b4ba071
commit
2a8cfc7922
@ -52,10 +52,6 @@ static constexpr u32 RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK = 15;
|
||||
static constexpr u32 INVALIDATE_COUNT_FOR_MANUAL_PROTECTION = 4;
|
||||
static constexpr u32 INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION = 60;
|
||||
|
||||
static CodeLUT DecodeCodeLUTPointer(u32 slot, CodeLUT ptr);
|
||||
static CodeLUT EncodeCodeLUTPointer(u32 slot, CodeLUT ptr);
|
||||
static CodeLUT OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc);
|
||||
|
||||
static void AllocateLUTs();
|
||||
static void DeallocateLUTs();
|
||||
static void ResetCodeLUT();
|
||||
@ -277,31 +273,6 @@ static constexpr u32 GetLUTSlotCount(bool include_unreachable)
|
||||
}
|
||||
} // namespace CPU::CodeCache
|
||||
|
||||
CPU::CodeCache::CodeLUT CPU::CodeCache::DecodeCodeLUTPointer(u32 slot, CodeLUT ptr)
|
||||
{
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (static_cast<u64>(slot) << 17));
|
||||
else
|
||||
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (slot << 16));
|
||||
}
|
||||
|
||||
CPU::CodeCache::CodeLUT CPU::CodeCache::EncodeCodeLUTPointer(u32 slot, CodeLUT ptr)
|
||||
{
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (static_cast<u64>(slot) << 17));
|
||||
else
|
||||
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (slot << 16));
|
||||
}
|
||||
|
||||
CPU::CodeCache::CodeLUT CPU::CodeCache::OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc)
|
||||
{
|
||||
u8* fake_byte_ptr = reinterpret_cast<u8*>(fake_ptr);
|
||||
if constexpr (sizeof(void*) == 8)
|
||||
return reinterpret_cast<const void**>(fake_byte_ptr + (static_cast<u64>(pc) << 1));
|
||||
else
|
||||
return reinterpret_cast<const void**>(fake_byte_ptr + pc);
|
||||
}
|
||||
|
||||
void CPU::CodeCache::AllocateLUTs()
|
||||
{
|
||||
constexpr u32 num_code_slots = GetLUTSlotCount(true);
|
||||
@ -323,9 +294,11 @@ void CPU::CodeCache::AllocateLUTs()
|
||||
// Mark everything as unreachable to begin with.
|
||||
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
|
||||
{
|
||||
g_code_lut[i] = EncodeCodeLUTPointer(i, code_table_ptr);
|
||||
g_code_lut[i] = code_table_ptr;
|
||||
s_block_lut[i] = nullptr;
|
||||
}
|
||||
|
||||
// Exclude unreachable.
|
||||
code_table_ptr += LUT_TABLE_SIZE;
|
||||
|
||||
// Allocate ranges.
|
||||
@ -337,7 +310,7 @@ void CPU::CodeCache::AllocateLUTs()
|
||||
{
|
||||
const u32 slot = start_slot + i;
|
||||
|
||||
g_code_lut[slot] = EncodeCodeLUTPointer(slot, code_table_ptr);
|
||||
g_code_lut[slot] = code_table_ptr;
|
||||
code_table_ptr += LUT_TABLE_SIZE;
|
||||
|
||||
s_block_lut[slot] = block_table_ptr;
|
||||
@ -357,15 +330,13 @@ void CPU::CodeCache::DeallocateLUTs()
|
||||
|
||||
void CPU::CodeCache::ResetCodeLUT()
|
||||
{
|
||||
if (!s_lut_code_pointers)
|
||||
return;
|
||||
|
||||
// Make the unreachable table jump to the invalid code callback.
|
||||
MemsetPtrs(s_lut_code_pointers.get(), g_interpret_block, LUT_TABLE_COUNT);
|
||||
|
||||
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
|
||||
{
|
||||
CodeLUT ptr = DecodeCodeLUTPointer(i, g_code_lut[i]);
|
||||
// Don't overwrite anything bound to unreachable.
|
||||
CodeLUT ptr = g_code_lut[i];
|
||||
if (ptr == s_lut_code_pointers.get())
|
||||
continue;
|
||||
|
||||
@ -375,18 +346,10 @@ void CPU::CodeCache::ResetCodeLUT()
|
||||
|
||||
void CPU::CodeCache::SetCodeLUT(u32 pc, const void* function)
|
||||
{
|
||||
if (!s_lut_code_pointers)
|
||||
return;
|
||||
|
||||
const u32 table = pc >> LUT_TABLE_SHIFT;
|
||||
CodeLUT encoded_ptr = g_code_lut[table];
|
||||
|
||||
#ifdef _DEBUG
|
||||
const CodeLUT table_ptr = DecodeCodeLUTPointer(table, encoded_ptr);
|
||||
DebugAssert(table_ptr != nullptr && table_ptr != s_lut_code_pointers.get());
|
||||
#endif
|
||||
|
||||
*OffsetCodeLUTPointer(encoded_ptr, pc) = function;
|
||||
const u32 idx = (pc & 0xFFFF) >> 2;
|
||||
DebugAssert(g_code_lut[table] != s_lut_code_pointers.get());
|
||||
g_code_lut[table][idx] = function;
|
||||
}
|
||||
|
||||
CPU::CodeCache::Block* CPU::CodeCache::LookupBlock(u32 pc)
|
||||
|
@ -290,10 +290,11 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
||||
armAsm->ldr(RARG1, PTR(&g_state.pc));
|
||||
armMoveAddressToReg(armAsm, RARG3, g_code_lut.data());
|
||||
armAsm->lsr(RARG2, RARG1, 16);
|
||||
armAsm->ubfx(RARG1, RARG1, 2, 14);
|
||||
armAsm->ldr(RARG2, MemOperand(RARG3, RARG2, LSL, 2));
|
||||
|
||||
// blr(x9[pc * 2]) (fast_map[pc >> 2])
|
||||
armAsm->ldr(RARG1, MemOperand(RARG2, RARG1));
|
||||
armAsm->ldr(RARG1, MemOperand(RARG2, RARG1, LSL, 2));
|
||||
armAsm->blx(RARG1);
|
||||
}
|
||||
|
||||
|
@ -478,7 +478,7 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
||||
armAsm->ldr(RWARG1, PTR(&g_state.pc));
|
||||
armMoveAddressToReg(armAsm, RXARG3, g_code_lut.data());
|
||||
armAsm->lsr(RWARG2, RWARG1, 16);
|
||||
armAsm->lsr(RWARG1, RWARG1, 2);
|
||||
armAsm->ubfx(RWARG1, RWARG1, 2, 14);
|
||||
armAsm->ldr(RXARG2, MemOperand(RXARG3, RXARG2, LSL, 3));
|
||||
|
||||
// blr(x9[pc * 2]) (fast_map[pc >> 2])
|
||||
|
@ -279,12 +279,14 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
||||
rvAsm->LWU(RARG1, PTR(&g_state.pc));
|
||||
rvMoveAddressToReg(rvAsm, RARG3, g_code_lut.data());
|
||||
rvAsm->SRLI(RARG2, RARG1, 16);
|
||||
rvAsm->SLLI(RARG1, RARG1, 1);
|
||||
rvAsm->SLLI(RARG2, RARG2, 3);
|
||||
rvAsm->ADD(RARG2, RARG2, RARG3);
|
||||
rvAsm->LD(RARG2, 0, RARG2);
|
||||
rvAsm->SLLI(RARG1, RARG1, 48); // idx = (pc & 0xFFFF) >> 2
|
||||
rvAsm->SRLI(RARG1, RARG1, 50);
|
||||
rvAsm->SLLI(RARG1, RARG1, 3);
|
||||
|
||||
// blr(x9[pc * 2]) (fast_map[pc >> 2])
|
||||
// blr(x9[pc * 2]) (fast_map[idx])
|
||||
rvAsm->ADD(RARG1, RARG1, RARG2);
|
||||
rvAsm->LD(RARG1, 0, RARG1);
|
||||
rvAsm->JR(RARG1);
|
||||
|
@ -156,8 +156,9 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
|
||||
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
|
||||
cg->lea(RXARG2, cg->dword[PTR(g_code_lut.data())]);
|
||||
cg->mov(RWARG3, RWARG1);
|
||||
cg->shr(RWARG3, 16);
|
||||
cg->shr(RWARG3, LUT_TABLE_SHIFT);
|
||||
cg->mov(RXARG2, cg->qword[RXARG2 + RXARG3 * 8]);
|
||||
cg->and_(RWARG1, (LUT_TABLE_SIZE - 1) << 2); // 0xFFFC
|
||||
|
||||
// call(rcx[pc * 2]) (fast_map[pc >> 2])
|
||||
cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]);
|
||||
|
Loading…
x
Reference in New Issue
Block a user