From d3ceda0c5bfd71f96b97ed9650c5a6f3a506874b Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 3 Dec 2024 16:33:18 +1000 Subject: [PATCH] CPU/CodeCache: Improve block host size heuristics Codegen is much better these days, especially with NewRec. --- src/core/cpu_code_cache.cpp | 34 +++++++++++++++++++++------------- src/core/cpu_recompiler.h | 21 ++++++++++++--------- 2 files changed, 33 insertions(+), 22 deletions(-) diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index bc8384352..31c1ac411 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -154,9 +154,10 @@ static u8* s_free_far_code_ptr = nullptr; static u32 s_far_code_size = 0; static u32 s_far_code_used = 0; -#if defined(_DEBUG) || defined(_DEVEL) +#ifdef DUMP_CODE_SIZE_STATS static u32 s_total_instructions_compiled = 0; static u32 s_total_host_instructions_emitted = 0; +static u32 s_total_host_code_used_by_instructions = 0; #endif } // namespace CPU::CodeCache @@ -691,7 +692,6 @@ void CPU::CodeCache::InvalidateAllRAMBlocks() void CPU::CodeCache::ClearBlocks() { - for (u32 i = 0; i < Bus::RAM_8MB_CODE_PAGE_COUNT; i++) { PageProtectionInfo& ppi = s_page_protection[i]; @@ -1345,10 +1345,13 @@ void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc) } // Ensure we're not going to run out of space while compiling this block. - // We could definitely do better here... TODO: far code is no longer needed for newrec + // We could definitely do better here... const u32 block_size = static_cast(s_block_instructions.size()); - if (GetFreeCodeSpace() < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) || - GetFreeFarCodeSpace() < (block_size * Recompiler::MAX_FAR_HOST_BYTES_PER_INSTRUCTION)) + const u32 free_code_space = GetFreeCodeSpace(); + const u32 free_far_code_space = GetFreeFarCodeSpace(); + if (free_code_space < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) || + free_code_space < Recompiler::MIN_CODE_RESERVE_FOR_BLOCK || + free_far_code_space < Recompiler::MIN_CODE_RESERVE_FOR_BLOCK) { ERROR_LOG("Out of code space while compiling {:08X}. Resetting code cache.", start_pc); CodeCache::Reset(); @@ -1540,9 +1543,10 @@ void CPU::CodeCache::CompileASMFunctions() { MemMap::BeginCodeWrite(); -#if defined(_DEBUG) || defined(_DEVEL) +#ifdef DUMP_CODE_SIZE_STATS s_total_instructions_compiled = 0; s_total_host_instructions_emitted = 0; + s_total_host_code_used_by_instructions = 0; #endif const u32 asm_size = EmitASMFunctions(GetFreeCodePointer(), GetFreeCodeSpace()); @@ -1580,14 +1584,18 @@ bool CPU::CodeCache::CompileBlock(Block* block) const u32 host_instructions = GetHostInstructionCount(host_code, host_code_size); s_total_instructions_compiled += block->size; s_total_host_instructions_emitted += host_instructions; + s_total_host_code_used_by_instructions += host_code_size; - DEV_LOG("0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}", block->pc, - host_code_size, host_far_code_size, block->size * 4, block->size, - static_cast(host_code_size) / static_cast(block->size * 4), - (static_cast(s_code_used) / static_cast(s_code_size)) * 100.0f, - (static_cast(s_far_code_used) / static_cast(s_far_code_size)) * 100.0f, - static_cast(host_instructions) / static_cast(block->size), - static_cast(s_total_host_instructions_emitted) / static_cast(s_total_instructions_compiled)); + DEV_LOG( + "0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}, bpi: {:.2f}/{:.2f}", + block->pc, host_code_size, host_far_code_size, block->size * 4, block->size, + static_cast(host_code_size) / static_cast(block->size * 4), + (static_cast(s_code_used) / static_cast(s_code_size)) * 100.0f, + (static_cast(s_far_code_used) / static_cast(s_far_code_size)) * 100.0f, + static_cast(host_instructions) / static_cast(block->size), + static_cast(s_total_host_instructions_emitted) / static_cast(s_total_instructions_compiled), + static_cast(block->host_code_size) / static_cast(block->size), + static_cast(s_total_host_code_used_by_instructions) / static_cast(s_total_instructions_compiled)); #endif #if 0 diff --git a/src/core/cpu_recompiler.h b/src/core/cpu_recompiler.h index 625420767..9ae94962a 100644 --- a/src/core/cpu_recompiler.h +++ b/src/core/cpu_recompiler.h @@ -14,7 +14,6 @@ namespace CPU { -// TODO: Get rid of the virtuals... somehow. class Recompiler { public: @@ -26,8 +25,10 @@ public: #if defined(CPU_ARCH_X64) // A reasonable "maximum" number of bytes per instruction. - static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; - static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; + // Seems to hover around ~21 bytes without PGXP, and ~26 bytes with. + // Use an upper bound of 32 bytes to be safe. + static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 32; + static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512; // Number of host registers. static constexpr u32 NUM_HOST_REGS = 16; @@ -37,7 +38,7 @@ public: // A reasonable "maximum" number of bytes per instruction. static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; - static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; + static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512; // Number of host registers. static constexpr u32 NUM_HOST_REGS = 16; @@ -45,14 +46,16 @@ public: #elif defined(CPU_ARCH_ARM64) + // A reasonable "maximum" number of bytes per instruction. + // Seems to hover around ~24 bytes without PGXP, and ~40 bytes with. + // Use an upper bound of 48 bytes to be safe. + static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 48; + static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512; + // Number of host registers. static constexpr u32 NUM_HOST_REGS = 32; static constexpr bool HAS_MEMORY_OPERANDS = false; - // A reasonable "maximum" number of bytes per instruction. - static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; - static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; - #elif defined(CPU_ARCH_RISCV64) // Number of host registers. @@ -61,7 +64,7 @@ public: // A reasonable "maximum" number of bytes per instruction. static constexpr u32 MAX_NEAR_HOST_BYTES_PER_INSTRUCTION = 64; - static constexpr u32 MAX_FAR_HOST_BYTES_PER_INSTRUCTION = 128; + static constexpr u32 MIN_CODE_RESERVE_FOR_BLOCK = 512; #endif