mirror of
https://github.com/stenzek/duckstation.git
synced 2025-06-07 03:55:33 +00:00
2262 lines
80 KiB
C++
2262 lines
80 KiB
C++
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
|
|
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
|
|
|
|
#include "gpu.h"
|
|
#include "dma.h"
|
|
#include "gpu_backend.h"
|
|
#include "gpu_dump.h"
|
|
#include "gpu_hw_texture_cache.h"
|
|
#include "gpu_shadergen.h"
|
|
#include "gpu_sw_rasterizer.h"
|
|
#include "gpu_thread.h"
|
|
#include "host.h"
|
|
#include "interrupt_controller.h"
|
|
#include "performance_counters.h"
|
|
#include "settings.h"
|
|
#include "system.h"
|
|
#include "system_private.h"
|
|
#include "timers.h"
|
|
#include "timing_event.h"
|
|
|
|
#include "util/gpu_device.h"
|
|
#include "util/image.h"
|
|
#include "util/imgui_manager.h"
|
|
#include "util/media_capture.h"
|
|
#include "util/postprocessing.h"
|
|
#include "util/shadergen.h"
|
|
#include "util/state_wrapper.h"
|
|
|
|
#include "common/align.h"
|
|
#include "common/error.h"
|
|
#include "common/file_system.h"
|
|
#include "common/gsvector_formatter.h"
|
|
#include "common/log.h"
|
|
#include "common/path.h"
|
|
#include "common/small_string.h"
|
|
#include "common/string_util.h"
|
|
|
|
#include "IconsEmoji.h"
|
|
#include "fmt/format.h"
|
|
#include "imgui.h"
|
|
|
|
#include <cmath>
|
|
#include <numbers>
|
|
#include <thread>
|
|
|
|
LOG_CHANNEL(GPU);
|
|
|
|
ALIGN_TO_CACHE_LINE GPU g_gpu;
|
|
|
|
// aligning VRAM to 4K is fine, since the ARM64 instructions compute 4K page aligned addresses
|
|
// or it would be, except we want to import the memory for readbacks on metal..
|
|
#ifdef DYNAMIC_HOST_PAGE_SIZE
|
|
#define VRAM_STORAGE_ALIGNMENT MIN_HOST_PAGE_SIZE
|
|
#else
|
|
#define VRAM_STORAGE_ALIGNMENT HOST_PAGE_SIZE
|
|
#endif
|
|
alignas(VRAM_STORAGE_ALIGNMENT) u16 g_vram[VRAM_SIZE / sizeof(u16)];
|
|
u16 g_gpu_clut[GPU_CLUT_SIZE];
|
|
|
|
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
|
|
|
|
static TimingEvent s_crtc_tick_event(
|
|
"GPU CRTC Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu.CRTCTickEvent(ticks); },
|
|
nullptr);
|
|
static TimingEvent s_command_tick_event(
|
|
"GPU Command Tick", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu.CommandTickEvent(ticks); },
|
|
nullptr);
|
|
static TimingEvent s_frame_done_event(
|
|
"Frame Done", 1, 1, [](void* param, TickCount ticks, TickCount ticks_late) { g_gpu.FrameDoneEvent(ticks); }, nullptr);
|
|
|
|
// #define PSX_GPU_STATS
|
|
#ifdef PSX_GPU_STATS
|
|
static u64 s_active_gpu_cycles = 0;
|
|
static u32 s_active_gpu_cycles_frames = 0;
|
|
#endif
|
|
|
|
GPU::GPU() = default;
|
|
|
|
GPU::~GPU() = default;
|
|
|
|
void GPU::Initialize()
|
|
{
|
|
if (!System::IsReplayingGPUDump())
|
|
s_crtc_tick_event.Activate();
|
|
|
|
m_force_progressive_scan = (g_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive);
|
|
m_force_frame_timings = g_settings.gpu_force_video_timing;
|
|
m_fifo_size = g_settings.gpu_fifo_size;
|
|
m_max_run_ahead = g_settings.gpu_max_run_ahead;
|
|
m_console_is_pal = System::IsPALRegion();
|
|
UpdateCRTCConfig();
|
|
|
|
#ifdef PSX_GPU_STATS
|
|
s_active_gpu_cycles = 0;
|
|
s_active_gpu_cycles_frames = 0;
|
|
#endif
|
|
}
|
|
|
|
void GPU::Shutdown()
|
|
{
|
|
s_command_tick_event.Deactivate();
|
|
s_crtc_tick_event.Deactivate();
|
|
s_frame_done_event.Deactivate();
|
|
|
|
StopRecordingGPUDump();
|
|
}
|
|
|
|
void GPU::UpdateSettings(const Settings& old_settings)
|
|
{
|
|
m_force_progressive_scan = (g_settings.display_deinterlacing_mode == DisplayDeinterlacingMode::Progressive);
|
|
m_fifo_size = g_settings.gpu_fifo_size;
|
|
m_max_run_ahead = g_settings.gpu_max_run_ahead;
|
|
|
|
if (m_force_frame_timings != g_settings.gpu_force_video_timing)
|
|
{
|
|
m_force_frame_timings = g_settings.gpu_force_video_timing;
|
|
m_console_is_pal = System::IsPALRegion();
|
|
UpdateCRTCConfig();
|
|
}
|
|
else if (g_settings.display_crop_mode != old_settings.display_crop_mode)
|
|
{
|
|
// Crop mode calls this, so recalculate the display area
|
|
UpdateCRTCDisplayParameters();
|
|
}
|
|
}
|
|
|
|
void GPU::CPUClockChanged()
|
|
{
|
|
UpdateCRTCConfig();
|
|
}
|
|
|
|
std::tuple<u32, u32> GPU::GetFullDisplayResolution() const
|
|
{
|
|
u32 width, height;
|
|
if (IsDisplayDisabled())
|
|
{
|
|
width = 0;
|
|
height = 0;
|
|
}
|
|
else
|
|
{
|
|
s32 xmin, xmax, ymin, ymax;
|
|
if (!m_GPUSTAT.pal_mode)
|
|
{
|
|
xmin = NTSC_HORIZONTAL_ACTIVE_START;
|
|
xmax = NTSC_HORIZONTAL_ACTIVE_END;
|
|
ymin = NTSC_VERTICAL_ACTIVE_START;
|
|
ymax = NTSC_VERTICAL_ACTIVE_END;
|
|
}
|
|
else
|
|
{
|
|
xmin = PAL_HORIZONTAL_ACTIVE_START;
|
|
xmax = PAL_HORIZONTAL_ACTIVE_END;
|
|
ymin = PAL_VERTICAL_ACTIVE_START;
|
|
ymax = PAL_VERTICAL_ACTIVE_END;
|
|
}
|
|
|
|
width = static_cast<u32>(std::max<s32>(std::clamp<s32>(m_crtc_state.regs.X2, xmin, xmax) -
|
|
std::clamp<s32>(m_crtc_state.regs.X1, xmin, xmax),
|
|
0) /
|
|
m_crtc_state.dot_clock_divider);
|
|
height =
|
|
static_cast<u32>(std::max<s32>(
|
|
std::clamp<s32>(m_crtc_state.regs.Y2, ymin, ymax) - std::clamp<s32>(m_crtc_state.regs.Y1, ymin, ymax), 0))
|
|
<< BoolToUInt8(m_GPUSTAT.vertical_interlace && m_GPUSTAT.vertical_resolution);
|
|
}
|
|
|
|
return std::tie(width, height);
|
|
}
|
|
|
|
void GPU::Reset(bool clear_vram)
|
|
{
|
|
m_GPUSTAT.bits = 0x14802000;
|
|
m_set_texture_disable_mask = false;
|
|
m_GPUREAD_latch = 0;
|
|
m_crtc_state.fractional_ticks = 0;
|
|
m_crtc_state.fractional_dot_ticks = 0;
|
|
m_crtc_state.current_tick_in_scanline = 0;
|
|
m_crtc_state.current_scanline = 0;
|
|
m_crtc_state.in_hblank = false;
|
|
m_crtc_state.in_vblank = false;
|
|
m_crtc_state.interlaced_field = 0;
|
|
m_crtc_state.interlaced_display_field = 0;
|
|
|
|
// Cancel VRAM writes.
|
|
m_blitter_state = BlitterState::Idle;
|
|
|
|
// Force event to reschedule itself.
|
|
s_crtc_tick_event.Deactivate();
|
|
s_command_tick_event.Deactivate();
|
|
|
|
SoftReset();
|
|
|
|
// Can skip the VRAM clear if it's not a hardware reset.
|
|
if (clear_vram)
|
|
GPUBackend::PushCommand(GPUBackend::NewClearVRAMCommand());
|
|
}
|
|
|
|
void GPU::SoftReset()
|
|
{
|
|
if (m_blitter_state == BlitterState::WritingVRAM)
|
|
FinishVRAMWrite();
|
|
|
|
m_GPUSTAT.texture_page_x_base = 0;
|
|
m_GPUSTAT.texture_page_y_base = 0;
|
|
m_GPUSTAT.semi_transparency_mode = GPUTransparencyMode::HalfBackgroundPlusHalfForeground;
|
|
m_GPUSTAT.texture_color_mode = GPUTextureMode::Palette4Bit;
|
|
m_GPUSTAT.dither_enable = false;
|
|
m_GPUSTAT.draw_to_displayed_field = false;
|
|
m_GPUSTAT.set_mask_while_drawing = false;
|
|
m_GPUSTAT.check_mask_before_draw = false;
|
|
m_GPUSTAT.reverse_flag = false;
|
|
m_GPUSTAT.texture_disable = false;
|
|
m_GPUSTAT.horizontal_resolution_2 = 0;
|
|
m_GPUSTAT.horizontal_resolution_1 = 0;
|
|
m_GPUSTAT.vertical_resolution = false;
|
|
m_GPUSTAT.pal_mode = System::IsPALRegion();
|
|
m_GPUSTAT.display_area_color_depth_24 = false;
|
|
m_GPUSTAT.vertical_interlace = false;
|
|
m_GPUSTAT.display_disable = true;
|
|
m_GPUSTAT.dma_direction = GPUDMADirection::Off;
|
|
m_drawing_area = {};
|
|
m_drawing_area_changed = true;
|
|
m_drawing_offset = {};
|
|
std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs));
|
|
m_crtc_state.regs.horizontal_display_range = 0xC60260;
|
|
m_crtc_state.regs.vertical_display_range = 0x3FC10;
|
|
m_blitter_state = BlitterState::Idle;
|
|
m_pending_command_ticks = 0;
|
|
m_command_total_words = 0;
|
|
m_vram_transfer = {};
|
|
m_fifo.Clear();
|
|
m_blit_buffer.clear();
|
|
m_blit_remaining_words = 0;
|
|
m_draw_mode.texture_window_value = 0xFFFFFFFFu;
|
|
SetDrawMode(0);
|
|
SetTexturePalette(0);
|
|
SetTextureWindow(0);
|
|
InvalidateCLUT();
|
|
UpdateDMARequest();
|
|
UpdateCRTCConfig();
|
|
UpdateCommandTickEvent();
|
|
UpdateGPUIdle();
|
|
}
|
|
|
|
bool GPU::DoState(StateWrapper& sw)
|
|
{
|
|
if (sw.IsWriting())
|
|
{
|
|
// Need to ensure our copy of VRAM is good.
|
|
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
}
|
|
|
|
sw.Do(&m_GPUSTAT.bits);
|
|
|
|
sw.Do(&m_draw_mode.mode_reg.bits);
|
|
sw.Do(&m_draw_mode.palette_reg.bits);
|
|
sw.Do(&m_draw_mode.texture_window_value);
|
|
|
|
if (sw.GetVersion() < 62) [[unlikely]]
|
|
{
|
|
// texture_page_x, texture_page_y, texture_palette_x, texture_palette_y
|
|
DebugAssert(sw.IsReading());
|
|
sw.SkipBytes(sizeof(u32) * 4);
|
|
}
|
|
|
|
sw.Do(&m_draw_mode.texture_window.and_x);
|
|
sw.Do(&m_draw_mode.texture_window.and_y);
|
|
sw.Do(&m_draw_mode.texture_window.or_x);
|
|
sw.Do(&m_draw_mode.texture_window.or_y);
|
|
sw.Do(&m_draw_mode.texture_x_flip);
|
|
sw.Do(&m_draw_mode.texture_y_flip);
|
|
|
|
sw.Do(&m_drawing_area.left);
|
|
sw.Do(&m_drawing_area.top);
|
|
sw.Do(&m_drawing_area.right);
|
|
sw.Do(&m_drawing_area.bottom);
|
|
sw.Do(&m_drawing_offset.x);
|
|
sw.Do(&m_drawing_offset.y);
|
|
sw.Do(&m_drawing_offset.x);
|
|
|
|
sw.Do(&m_console_is_pal);
|
|
sw.Do(&m_set_texture_disable_mask);
|
|
|
|
sw.Do(&m_crtc_state.regs.display_address_start);
|
|
sw.Do(&m_crtc_state.regs.horizontal_display_range);
|
|
sw.Do(&m_crtc_state.regs.vertical_display_range);
|
|
sw.Do(&m_crtc_state.dot_clock_divider);
|
|
sw.Do(&m_crtc_state.display_width);
|
|
sw.Do(&m_crtc_state.display_height);
|
|
sw.Do(&m_crtc_state.display_origin_left);
|
|
sw.Do(&m_crtc_state.display_origin_top);
|
|
sw.Do(&m_crtc_state.display_vram_left);
|
|
sw.Do(&m_crtc_state.display_vram_top);
|
|
sw.Do(&m_crtc_state.display_vram_width);
|
|
sw.Do(&m_crtc_state.display_vram_height);
|
|
sw.Do(&m_crtc_state.horizontal_total);
|
|
sw.Do(&m_crtc_state.horizontal_visible_start);
|
|
sw.Do(&m_crtc_state.horizontal_visible_end);
|
|
sw.Do(&m_crtc_state.horizontal_display_start);
|
|
sw.Do(&m_crtc_state.horizontal_display_end);
|
|
sw.Do(&m_crtc_state.vertical_total);
|
|
sw.Do(&m_crtc_state.vertical_visible_start);
|
|
sw.Do(&m_crtc_state.vertical_visible_end);
|
|
sw.Do(&m_crtc_state.vertical_display_start);
|
|
sw.Do(&m_crtc_state.vertical_display_end);
|
|
sw.Do(&m_crtc_state.fractional_ticks);
|
|
sw.Do(&m_crtc_state.current_tick_in_scanline);
|
|
sw.Do(&m_crtc_state.current_scanline);
|
|
sw.DoEx(&m_crtc_state.fractional_dot_ticks, 46, 0);
|
|
sw.Do(&m_crtc_state.in_hblank);
|
|
sw.Do(&m_crtc_state.in_vblank);
|
|
sw.Do(&m_crtc_state.interlaced_field);
|
|
sw.Do(&m_crtc_state.interlaced_display_field);
|
|
sw.Do(&m_crtc_state.active_line_lsb);
|
|
|
|
sw.Do(&m_blitter_state);
|
|
sw.Do(&m_pending_command_ticks);
|
|
sw.Do(&m_command_total_words);
|
|
sw.Do(&m_GPUREAD_latch);
|
|
|
|
u16 load_clut_data[GPU_CLUT_SIZE];
|
|
if (sw.GetVersion() < 64) [[unlikely]]
|
|
{
|
|
// Clear CLUT cache and let it populate later.
|
|
InvalidateCLUT();
|
|
std::memset(load_clut_data, 0, sizeof(load_clut_data));
|
|
}
|
|
else
|
|
{
|
|
sw.Do(&m_current_clut_reg_bits);
|
|
sw.Do(&m_current_clut_is_8bit);
|
|
|
|
// I hate this extra copy... because I'm a moron and put it in the middle of the state data.
|
|
sw.DoArray(sw.IsReading() ? load_clut_data : g_gpu_clut, std::size(g_gpu_clut));
|
|
}
|
|
|
|
sw.Do(&m_vram_transfer.x);
|
|
sw.Do(&m_vram_transfer.y);
|
|
sw.Do(&m_vram_transfer.width);
|
|
sw.Do(&m_vram_transfer.height);
|
|
sw.Do(&m_vram_transfer.col);
|
|
sw.Do(&m_vram_transfer.row);
|
|
|
|
sw.Do(&m_fifo);
|
|
sw.Do(&m_blit_buffer);
|
|
sw.Do(&m_blit_remaining_words);
|
|
sw.Do(&m_render_command.bits);
|
|
|
|
sw.Do(&m_max_run_ahead);
|
|
sw.Do(&m_fifo_size);
|
|
|
|
if (!sw.DoMarker("GPU-VRAM"))
|
|
return false;
|
|
|
|
if (sw.IsReading())
|
|
{
|
|
// Need to calculate the TC data size. But skip over VRAM first, we'll grab it later.
|
|
const size_t vram_start_pos = sw.GetPosition();
|
|
sw.SkipBytes(VRAM_SIZE);
|
|
u32 tc_data_size;
|
|
if (!GPUTextureCache::GetStateSize(sw, &tc_data_size)) [[unlikely]]
|
|
return false;
|
|
|
|
// Now we can actually allocate FIFO storage, and push it to the GPU thread.
|
|
GPUBackendLoadStateCommand* cmd = static_cast<GPUBackendLoadStateCommand*>(
|
|
GPUThread::AllocateCommand(GPUBackendCommandType::LoadState, sizeof(GPUBackendLoadStateCommand) + tc_data_size));
|
|
std::memcpy(cmd->clut_data, load_clut_data, sizeof(cmd->clut_data));
|
|
std::memcpy(cmd->vram_data, sw.GetData() + vram_start_pos, VRAM_SIZE);
|
|
cmd->texture_cache_state_version = sw.GetVersion();
|
|
cmd->texture_cache_state_size = tc_data_size;
|
|
if (tc_data_size > 0)
|
|
std::memcpy(cmd->texture_cache_state, sw.GetData() + vram_start_pos + VRAM_SIZE, tc_data_size);
|
|
GPUThread::PushCommand(cmd);
|
|
|
|
m_drawing_area_changed = true;
|
|
SetClampedDrawingArea();
|
|
UpdateDMARequest();
|
|
UpdateCRTCConfig();
|
|
UpdateCommandTickEvent();
|
|
}
|
|
else // if not memory state
|
|
{
|
|
// write vram
|
|
sw.DoBytes(g_vram, VRAM_SIZE);
|
|
|
|
// write TC data, we have to be super careful here, since we're reading GPU thread state...
|
|
GPUTextureCache::DoState(sw, false);
|
|
}
|
|
|
|
return !sw.HasError();
|
|
}
|
|
|
|
void GPU::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss)
|
|
{
|
|
sw.Do(&m_GPUSTAT.bits);
|
|
|
|
sw.DoBytes(&m_draw_mode, sizeof(m_draw_mode));
|
|
sw.DoBytes(&m_drawing_area, sizeof(m_drawing_area));
|
|
sw.DoBytes(&m_drawing_offset, sizeof(m_drawing_offset));
|
|
|
|
sw.Do(&m_console_is_pal);
|
|
sw.Do(&m_set_texture_disable_mask);
|
|
|
|
sw.DoBytes(&m_crtc_state, sizeof(m_crtc_state));
|
|
|
|
sw.Do(&m_blitter_state);
|
|
sw.Do(&m_pending_command_ticks);
|
|
sw.Do(&m_command_total_words);
|
|
sw.Do(&m_GPUREAD_latch);
|
|
|
|
sw.Do(&m_current_clut_reg_bits);
|
|
sw.Do(&m_current_clut_is_8bit);
|
|
|
|
sw.DoBytes(&m_vram_transfer, sizeof(m_vram_transfer));
|
|
|
|
sw.Do(&m_fifo);
|
|
sw.Do(&m_blit_buffer);
|
|
sw.Do(&m_blit_remaining_words);
|
|
sw.Do(&m_render_command.bits);
|
|
|
|
if (sw.IsReading())
|
|
{
|
|
m_drawing_area_changed = true;
|
|
SetClampedDrawingArea();
|
|
UpdateDMARequest();
|
|
UpdateCRTCConfig();
|
|
UpdateCommandTickEvent();
|
|
}
|
|
|
|
// Push to thread.
|
|
GPUBackendDoMemoryStateCommand* cmd = static_cast<GPUBackendDoMemoryStateCommand*>(GPUThread::AllocateCommand(
|
|
sw.IsReading() ? GPUBackendCommandType::LoadMemoryState : GPUBackendCommandType::SaveMemoryState,
|
|
sizeof(GPUBackendDoMemoryStateCommand)));
|
|
cmd->memory_save_state = &mss;
|
|
GPUThread::PushCommandAndWakeThread(cmd);
|
|
}
|
|
|
|
void GPU::UpdateDMARequest()
|
|
{
|
|
switch (m_blitter_state)
|
|
{
|
|
case BlitterState::Idle:
|
|
m_GPUSTAT.ready_to_send_vram = false;
|
|
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.IsEmpty() || m_fifo.GetSize() < m_command_total_words);
|
|
break;
|
|
|
|
case BlitterState::WritingVRAM:
|
|
m_GPUSTAT.ready_to_send_vram = false;
|
|
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
|
|
break;
|
|
|
|
case BlitterState::ReadingVRAM:
|
|
m_GPUSTAT.ready_to_send_vram = true;
|
|
m_GPUSTAT.ready_to_recieve_dma = false;
|
|
break;
|
|
|
|
case BlitterState::DrawingPolyLine:
|
|
m_GPUSTAT.ready_to_send_vram = false;
|
|
m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size);
|
|
break;
|
|
|
|
default:
|
|
UnreachableCode();
|
|
break;
|
|
}
|
|
|
|
bool dma_request;
|
|
switch (m_GPUSTAT.dma_direction)
|
|
{
|
|
case GPUDMADirection::Off:
|
|
dma_request = false;
|
|
break;
|
|
|
|
case GPUDMADirection::FIFO:
|
|
dma_request = m_GPUSTAT.ready_to_recieve_dma;
|
|
break;
|
|
|
|
case GPUDMADirection::CPUtoGP0:
|
|
dma_request = m_GPUSTAT.ready_to_recieve_dma;
|
|
break;
|
|
|
|
case GPUDMADirection::GPUREADtoCPU:
|
|
dma_request = m_GPUSTAT.ready_to_send_vram;
|
|
break;
|
|
|
|
default:
|
|
dma_request = false;
|
|
break;
|
|
}
|
|
m_GPUSTAT.dma_data_request = dma_request;
|
|
DMA::SetRequest(DMA::Channel::GPU, dma_request);
|
|
}
|
|
|
|
void GPU::UpdateGPUIdle()
|
|
{
|
|
m_GPUSTAT.gpu_idle = (m_blitter_state == BlitterState::Idle && m_pending_command_ticks <= 0 && m_fifo.IsEmpty());
|
|
}
|
|
|
|
u32 GPU::ReadRegister(u32 offset)
|
|
{
|
|
switch (offset)
|
|
{
|
|
case 0x00:
|
|
return ReadGPUREAD();
|
|
|
|
case 0x04:
|
|
{
|
|
// code can be dependent on the odd/even bit, so update the GPU state when reading.
|
|
// we can mitigate this slightly by only updating when the raster is actually hitting a new line
|
|
if (IsCRTCScanlinePending())
|
|
SynchronizeCRTC();
|
|
if (IsCommandCompletionPending())
|
|
s_command_tick_event.InvokeEarly();
|
|
|
|
return m_GPUSTAT.bits;
|
|
}
|
|
|
|
default:
|
|
ERROR_LOG("Unhandled register read: {:02X}", offset);
|
|
return UINT32_C(0xFFFFFFFF);
|
|
}
|
|
}
|
|
|
|
void GPU::WriteRegister(u32 offset, u32 value)
|
|
{
|
|
switch (offset)
|
|
{
|
|
case 0x00:
|
|
{
|
|
if (m_gpu_dump) [[unlikely]]
|
|
m_gpu_dump->WriteGP0Packet(value);
|
|
|
|
m_fifo.Push(value);
|
|
ExecuteCommands();
|
|
return;
|
|
}
|
|
|
|
case 0x04:
|
|
{
|
|
if (m_gpu_dump) [[unlikely]]
|
|
m_gpu_dump->WriteGP1Packet(value);
|
|
|
|
WriteGP1(value);
|
|
return;
|
|
}
|
|
|
|
default:
|
|
{
|
|
ERROR_LOG("Unhandled register write: {:02X} <- {:08X}", offset, value);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
void GPU::DMARead(u32* words, u32 word_count)
|
|
{
|
|
if (m_GPUSTAT.dma_direction != GPUDMADirection::GPUREADtoCPU)
|
|
{
|
|
ERROR_LOG("Invalid DMA direction from GPU DMA read");
|
|
std::fill_n(words, word_count, UINT32_C(0xFFFFFFFF));
|
|
return;
|
|
}
|
|
|
|
for (u32 i = 0; i < word_count; i++)
|
|
words[i] = ReadGPUREAD();
|
|
}
|
|
|
|
void GPU::EndDMAWrite()
|
|
{
|
|
ExecuteCommands();
|
|
}
|
|
|
|
/**
|
|
* NTSC GPU clock 53.693175 MHz
|
|
* PAL GPU clock 53.203425 MHz
|
|
* courtesy of @ggrtk
|
|
*
|
|
* NTSC - sysclk * 715909 / 451584
|
|
* PAL - sysclk * 709379 / 451584
|
|
*/
|
|
|
|
TickCount GPU::GetCRTCFrequency() const
|
|
{
|
|
return m_console_is_pal ? 53203425 : 53693175;
|
|
}
|
|
|
|
TickCount GPU::CRTCTicksToSystemTicks(TickCount gpu_ticks, TickCount fractional_ticks) const
|
|
{
|
|
// convert to master clock, rounding up as we want to overshoot not undershoot
|
|
if (!m_console_is_pal)
|
|
return static_cast<TickCount>((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(715908)) / u64(715909));
|
|
else
|
|
return static_cast<TickCount>((u64(gpu_ticks) * u64(451584) + fractional_ticks + u64(709378)) / u64(709379));
|
|
}
|
|
|
|
TickCount GPU::SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const
|
|
{
|
|
u64 mul = u64(sysclk_ticks);
|
|
mul *= !m_console_is_pal ? u64(715909) : u64(709379);
|
|
mul += u64(*fractional_ticks);
|
|
|
|
const TickCount ticks = static_cast<TickCount>(mul / u64(451584));
|
|
*fractional_ticks = static_cast<TickCount>(mul % u64(451584));
|
|
return ticks;
|
|
}
|
|
|
|
void GPU::AddCommandTicks(TickCount ticks)
|
|
{
|
|
m_pending_command_ticks += ticks;
|
|
#ifdef PSX_GPU_STATS
|
|
s_active_gpu_cycles += ticks;
|
|
#endif
|
|
}
|
|
|
|
void GPU::SynchronizeCRTC()
|
|
{
|
|
s_crtc_tick_event.InvokeEarly();
|
|
}
|
|
|
|
float GPU::ComputeHorizontalFrequency() const
|
|
{
|
|
const CRTCState& cs = m_crtc_state;
|
|
TickCount fractional_ticks = 0;
|
|
return static_cast<float>(
|
|
static_cast<double>(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) /
|
|
static_cast<double>(cs.horizontal_total));
|
|
}
|
|
|
|
float GPU::ComputeVerticalFrequency() const
|
|
{
|
|
const CRTCState& cs = m_crtc_state;
|
|
const TickCount ticks_per_frame = cs.horizontal_total * cs.vertical_total;
|
|
TickCount fractional_ticks = 0;
|
|
return static_cast<float>(
|
|
static_cast<double>(SystemTicksToCRTCTicks(System::GetTicksPerSecond(), &fractional_ticks)) /
|
|
static_cast<double>(ticks_per_frame));
|
|
}
|
|
|
|
float GPU::ComputeDisplayAspectRatio() const
|
|
{
|
|
// Display off => Doesn't matter.
|
|
if (m_crtc_state.display_width == 0 || m_crtc_state.display_height == 0)
|
|
return 4.0f / 3.0f;
|
|
|
|
// PAR 1:1 is not corrected.
|
|
if (g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1)
|
|
return static_cast<float>(m_crtc_state.display_width) / static_cast<float>(m_crtc_state.display_height);
|
|
|
|
float ar = 4.0f / 3.0f;
|
|
if (!g_settings.display_force_4_3_for_24bit || !m_GPUSTAT.display_area_color_depth_24)
|
|
{
|
|
if (g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow)
|
|
{
|
|
const WindowInfo& wi = GPUThread::GetRenderWindowInfo();
|
|
if (!wi.IsSurfaceless())
|
|
ar = static_cast<float>(wi.surface_width) / static_cast<float>(wi.surface_height);
|
|
}
|
|
else if (g_settings.display_aspect_ratio == DisplayAspectRatio::Custom)
|
|
{
|
|
ar = static_cast<float>(g_settings.display_aspect_ratio_custom_numerator) /
|
|
static_cast<float>(g_settings.display_aspect_ratio_custom_denominator);
|
|
}
|
|
else
|
|
{
|
|
ar = g_settings.GetDisplayAspectRatioValue();
|
|
}
|
|
}
|
|
|
|
return ar;
|
|
}
|
|
|
|
float GPU::ComputeSourceAspectRatio() const
|
|
{
|
|
const float source_aspect_ratio =
|
|
static_cast<float>(m_crtc_state.display_width) / static_cast<float>(m_crtc_state.display_height);
|
|
|
|
// Correction is applied to the GTE for stretch to fit, that way it fills the window.
|
|
const float source_aspect_ratio_correction =
|
|
(g_settings.display_aspect_ratio == DisplayAspectRatio::MatchWindow) ? 1.0f : ComputeAspectRatioCorrection();
|
|
|
|
return source_aspect_ratio / source_aspect_ratio_correction;
|
|
}
|
|
|
|
float GPU::ComputePixelAspectRatio() const
|
|
{
|
|
const float dar = ComputeDisplayAspectRatio();
|
|
const float sar = ComputeSourceAspectRatio();
|
|
const float par = dar / sar;
|
|
return par;
|
|
}
|
|
|
|
float GPU::ComputeAspectRatioCorrection() const
|
|
{
|
|
const CRTCState& cs = m_crtc_state;
|
|
float relative_width = static_cast<float>(cs.horizontal_visible_end - cs.horizontal_visible_start);
|
|
float relative_height = static_cast<float>(cs.vertical_visible_end - cs.vertical_visible_start);
|
|
if (relative_width <= 0 || relative_height <= 0 || g_settings.display_aspect_ratio == DisplayAspectRatio::PAR1_1 ||
|
|
g_settings.display_crop_mode == DisplayCropMode::OverscanUncorrected ||
|
|
g_settings.display_crop_mode == DisplayCropMode::BordersUncorrected)
|
|
{
|
|
return 1.0f;
|
|
}
|
|
|
|
if (m_GPUSTAT.pal_mode)
|
|
{
|
|
relative_width /= static_cast<float>(PAL_HORIZONTAL_ACTIVE_END - PAL_HORIZONTAL_ACTIVE_START);
|
|
relative_height /= static_cast<float>(PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START);
|
|
}
|
|
else
|
|
{
|
|
relative_width /= static_cast<float>(NTSC_HORIZONTAL_ACTIVE_END - NTSC_HORIZONTAL_ACTIVE_START);
|
|
relative_height /= static_cast<float>(NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START);
|
|
}
|
|
|
|
return (relative_width / relative_height);
|
|
}
|
|
|
|
void GPU::ApplyPixelAspectRatioToSize(float par, float* width, float* height)
|
|
{
|
|
if (par < 1.0f)
|
|
{
|
|
// stretch height, preserve width
|
|
*height = std::ceil(*height / par);
|
|
}
|
|
else
|
|
{
|
|
// stretch width, preserve height
|
|
*width = std::ceil(*width * par);
|
|
}
|
|
}
|
|
|
|
void GPU::UpdateCRTCConfig()
|
|
{
|
|
static constexpr std::array<u16, 8> dot_clock_dividers = {{10, 8, 5, 4, 7, 7, 7, 7}};
|
|
CRTCState& cs = m_crtc_state;
|
|
|
|
cs.vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES;
|
|
cs.horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE;
|
|
cs.horizontal_active_start = m_GPUSTAT.pal_mode ? PAL_HORIZONTAL_ACTIVE_START : NTSC_HORIZONTAL_ACTIVE_START;
|
|
cs.horizontal_active_end = m_GPUSTAT.pal_mode ? PAL_HORIZONTAL_ACTIVE_END : NTSC_HORIZONTAL_ACTIVE_END;
|
|
|
|
const u8 horizontal_resolution_index = m_GPUSTAT.horizontal_resolution_1 | (m_GPUSTAT.horizontal_resolution_2 << 2);
|
|
cs.dot_clock_divider = dot_clock_dividers[horizontal_resolution_index];
|
|
cs.horizontal_display_start =
|
|
(std::min<u16>(cs.regs.X1, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
|
|
cs.horizontal_display_end =
|
|
(std::min<u16>(cs.regs.X2, cs.horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
|
|
cs.vertical_display_start = std::min<u16>(cs.regs.Y1, cs.vertical_total);
|
|
cs.vertical_display_end = std::min<u16>(cs.regs.Y2, cs.vertical_total);
|
|
|
|
if (m_GPUSTAT.pal_mode && m_force_frame_timings == ForceVideoTimingMode::NTSC)
|
|
{
|
|
// scale to NTSC parameters
|
|
cs.horizontal_display_start =
|
|
static_cast<u16>((static_cast<u32>(cs.horizontal_display_start) * NTSC_TICKS_PER_LINE) / PAL_TICKS_PER_LINE);
|
|
cs.horizontal_display_end = static_cast<u16>(
|
|
((static_cast<u32>(cs.horizontal_display_end) * NTSC_TICKS_PER_LINE) + (PAL_TICKS_PER_LINE - 1)) /
|
|
PAL_TICKS_PER_LINE);
|
|
cs.vertical_display_start =
|
|
static_cast<u16>((static_cast<u32>(cs.vertical_display_start) * NTSC_TOTAL_LINES) / PAL_TOTAL_LINES);
|
|
cs.vertical_display_end = static_cast<u16>(
|
|
((static_cast<u32>(cs.vertical_display_end) * NTSC_TOTAL_LINES) + (PAL_TOTAL_LINES - 1)) / PAL_TOTAL_LINES);
|
|
|
|
cs.vertical_total = NTSC_TOTAL_LINES;
|
|
cs.current_scanline %= NTSC_TOTAL_LINES;
|
|
cs.horizontal_total = NTSC_TICKS_PER_LINE;
|
|
cs.current_tick_in_scanline %= NTSC_TICKS_PER_LINE;
|
|
}
|
|
else if (!m_GPUSTAT.pal_mode && m_force_frame_timings == ForceVideoTimingMode::PAL)
|
|
{
|
|
// scale to PAL parameters
|
|
cs.horizontal_display_start =
|
|
static_cast<u16>((static_cast<u32>(cs.horizontal_display_start) * PAL_TICKS_PER_LINE) / NTSC_TICKS_PER_LINE);
|
|
cs.horizontal_display_end = static_cast<u16>(
|
|
((static_cast<u32>(cs.horizontal_display_end) * PAL_TICKS_PER_LINE) + (NTSC_TICKS_PER_LINE - 1)) /
|
|
NTSC_TICKS_PER_LINE);
|
|
cs.vertical_display_start =
|
|
static_cast<u16>((static_cast<u32>(cs.vertical_display_start) * PAL_TOTAL_LINES) / NTSC_TOTAL_LINES);
|
|
cs.vertical_display_end = static_cast<u16>(
|
|
((static_cast<u32>(cs.vertical_display_end) * PAL_TOTAL_LINES) + (NTSC_TOTAL_LINES - 1)) / NTSC_TOTAL_LINES);
|
|
|
|
cs.vertical_total = PAL_TOTAL_LINES;
|
|
cs.current_scanline %= PAL_TOTAL_LINES;
|
|
cs.horizontal_total = PAL_TICKS_PER_LINE;
|
|
cs.current_tick_in_scanline %= PAL_TICKS_PER_LINE;
|
|
}
|
|
|
|
cs.horizontal_display_start =
|
|
static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_display_start)));
|
|
cs.horizontal_display_end =
|
|
static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_display_end)));
|
|
cs.horizontal_active_start =
|
|
static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_active_start)));
|
|
cs.horizontal_active_end =
|
|
static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_active_end)));
|
|
cs.horizontal_total = static_cast<u16>(System::ScaleTicksToOverclock(static_cast<TickCount>(cs.horizontal_total)));
|
|
|
|
cs.current_tick_in_scanline %= cs.horizontal_total;
|
|
cs.UpdateHBlankFlag();
|
|
|
|
cs.current_scanline %= cs.vertical_total;
|
|
|
|
System::SetVideoFrameRate(ComputeVerticalFrequency());
|
|
|
|
UpdateCRTCDisplayParameters();
|
|
UpdateCRTCTickEvent();
|
|
}
|
|
|
|
void GPU::UpdateCRTCDisplayParameters()
|
|
{
|
|
CRTCState& cs = m_crtc_state;
|
|
const DisplayCropMode crop_mode = g_settings.display_crop_mode;
|
|
|
|
const u16 horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE;
|
|
const u16 vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES;
|
|
const u16 horizontal_display_start =
|
|
(std::min<u16>(cs.regs.X1, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
|
|
const u16 horizontal_display_end =
|
|
(std::min<u16>(cs.regs.X2, horizontal_total) / cs.dot_clock_divider) * cs.dot_clock_divider;
|
|
const u16 vertical_display_start = std::min<u16>(cs.regs.Y1, vertical_total);
|
|
const u16 vertical_display_end = std::min<u16>(cs.regs.Y2, vertical_total);
|
|
const u16 old_horizontal_visible_start = cs.horizontal_visible_start;
|
|
const u16 old_horizontal_visible_end = cs.horizontal_visible_end;
|
|
const u16 old_vertical_visible_start = cs.vertical_visible_start;
|
|
const u16 old_vertical_visible_end = cs.vertical_visible_end;
|
|
|
|
if (m_GPUSTAT.pal_mode)
|
|
{
|
|
// TODO: Verify PAL numbers.
|
|
switch (crop_mode)
|
|
{
|
|
case DisplayCropMode::None:
|
|
cs.horizontal_visible_start = PAL_HORIZONTAL_ACTIVE_START;
|
|
cs.horizontal_visible_end = PAL_HORIZONTAL_ACTIVE_END;
|
|
cs.vertical_visible_start = PAL_VERTICAL_ACTIVE_START;
|
|
cs.vertical_visible_end = PAL_VERTICAL_ACTIVE_END;
|
|
break;
|
|
|
|
case DisplayCropMode::Overscan:
|
|
case DisplayCropMode::OverscanUncorrected:
|
|
cs.horizontal_visible_start = static_cast<u16>(std::max<s32>(
|
|
0, static_cast<s32>(PAL_OVERSCAN_HORIZONTAL_ACTIVE_START) + g_settings.display_active_start_offset));
|
|
cs.horizontal_visible_end = static_cast<u16>(
|
|
std::max<s32>(cs.horizontal_visible_start,
|
|
static_cast<s32>(PAL_OVERSCAN_HORIZONTAL_ACTIVE_END) + g_settings.display_active_end_offset));
|
|
cs.vertical_visible_start = static_cast<u16>(std::max<s32>(
|
|
0, static_cast<s32>(PAL_OVERSCAN_VERTICAL_ACTIVE_START) + g_settings.display_line_start_offset));
|
|
cs.vertical_visible_end = static_cast<u16>(
|
|
std::max<s32>(cs.vertical_visible_start,
|
|
static_cast<s32>(NTSC_OVERSCAN_VERTICAL_ACTIVE_END) + g_settings.display_line_end_offset));
|
|
break;
|
|
|
|
case DisplayCropMode::Borders:
|
|
case DisplayCropMode::BordersUncorrected:
|
|
default:
|
|
cs.horizontal_visible_start = horizontal_display_start;
|
|
cs.horizontal_visible_end = horizontal_display_end;
|
|
cs.vertical_visible_start = vertical_display_start;
|
|
cs.vertical_visible_end = vertical_display_end;
|
|
break;
|
|
}
|
|
cs.horizontal_visible_start =
|
|
std::clamp<u16>(cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_START, PAL_HORIZONTAL_ACTIVE_END);
|
|
cs.horizontal_visible_end =
|
|
std::clamp<u16>(cs.horizontal_visible_end, cs.horizontal_visible_start, PAL_HORIZONTAL_ACTIVE_END);
|
|
cs.vertical_visible_start =
|
|
std::clamp<u16>(cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_START, PAL_VERTICAL_ACTIVE_END);
|
|
cs.vertical_visible_end =
|
|
std::clamp<u16>(cs.vertical_visible_end, cs.vertical_visible_start, PAL_VERTICAL_ACTIVE_END);
|
|
}
|
|
else
|
|
{
|
|
switch (crop_mode)
|
|
{
|
|
case DisplayCropMode::None:
|
|
cs.horizontal_visible_start = NTSC_HORIZONTAL_ACTIVE_START;
|
|
cs.horizontal_visible_end = NTSC_HORIZONTAL_ACTIVE_END;
|
|
cs.vertical_visible_start = NTSC_VERTICAL_ACTIVE_START;
|
|
cs.vertical_visible_end = NTSC_VERTICAL_ACTIVE_END;
|
|
break;
|
|
|
|
case DisplayCropMode::Overscan:
|
|
case DisplayCropMode::OverscanUncorrected:
|
|
cs.horizontal_visible_start = static_cast<u16>(std::max<s32>(
|
|
0, static_cast<s32>(NTSC_OVERSCAN_HORIZONTAL_ACTIVE_START) + g_settings.display_active_start_offset));
|
|
cs.horizontal_visible_end = static_cast<u16>(
|
|
std::max<s32>(cs.horizontal_visible_start,
|
|
static_cast<s32>(NTSC_OVERSCAN_HORIZONTAL_ACTIVE_END) + g_settings.display_active_end_offset));
|
|
cs.vertical_visible_start = static_cast<u16>(std::max<s32>(
|
|
0, static_cast<s32>(NTSC_OVERSCAN_VERTICAL_ACTIVE_START) + g_settings.display_line_start_offset));
|
|
cs.vertical_visible_end = static_cast<u16>(
|
|
std::max<s32>(cs.vertical_visible_start,
|
|
static_cast<s32>(NTSC_OVERSCAN_VERTICAL_ACTIVE_END) + g_settings.display_line_end_offset));
|
|
break;
|
|
|
|
case DisplayCropMode::Borders:
|
|
case DisplayCropMode::BordersUncorrected:
|
|
default:
|
|
cs.horizontal_visible_start = horizontal_display_start;
|
|
cs.horizontal_visible_end = horizontal_display_end;
|
|
cs.vertical_visible_start = vertical_display_start;
|
|
cs.vertical_visible_end = vertical_display_end;
|
|
break;
|
|
}
|
|
cs.horizontal_visible_start =
|
|
std::clamp<u16>(cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_START, NTSC_HORIZONTAL_ACTIVE_END);
|
|
cs.horizontal_visible_end =
|
|
std::clamp<u16>(cs.horizontal_visible_end, cs.horizontal_visible_start, NTSC_HORIZONTAL_ACTIVE_END);
|
|
cs.vertical_visible_start =
|
|
std::clamp<u16>(cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_START, NTSC_VERTICAL_ACTIVE_END);
|
|
cs.vertical_visible_end =
|
|
std::clamp<u16>(cs.vertical_visible_end, cs.vertical_visible_start, NTSC_VERTICAL_ACTIVE_END);
|
|
}
|
|
|
|
// If force-progressive is enabled, we only double the height in 480i mode. This way non-interleaved 480i framebuffers
|
|
// won't be broken when displayed.
|
|
const u8 y_shift = BoolToUInt8(m_GPUSTAT.vertical_interlace && m_GPUSTAT.vertical_resolution);
|
|
const u8 height_shift = m_force_progressive_scan ? y_shift : BoolToUInt8(m_GPUSTAT.vertical_interlace);
|
|
const u16 old_vram_width = m_crtc_state.display_vram_width;
|
|
const u16 old_vram_height = m_crtc_state.display_vram_height;
|
|
|
|
// Determine screen size.
|
|
cs.display_width = (cs.horizontal_visible_end - cs.horizontal_visible_start) / cs.dot_clock_divider;
|
|
cs.display_height = (cs.vertical_visible_end - cs.vertical_visible_start) << height_shift;
|
|
|
|
// Determine number of pixels outputted from VRAM (in general, round to 4-pixel multiple).
|
|
// TODO: Verify behavior if values are outside of the active video portion of scanline.
|
|
const u16 horizontal_display_ticks =
|
|
(horizontal_display_end < horizontal_display_start) ? 0 : (horizontal_display_end - horizontal_display_start);
|
|
|
|
const u16 horizontal_display_pixels = horizontal_display_ticks / cs.dot_clock_divider;
|
|
if (horizontal_display_pixels == 1u)
|
|
cs.display_vram_width = 4u;
|
|
else
|
|
cs.display_vram_width = (horizontal_display_pixels + 2u) & ~3u;
|
|
|
|
// Determine if we need to adjust the VRAM rectangle (because the display is starting outside the visible area) or add
|
|
// padding.
|
|
u16 horizontal_skip_pixels;
|
|
if (horizontal_display_start >= cs.horizontal_visible_start)
|
|
{
|
|
cs.display_origin_left = (horizontal_display_start - cs.horizontal_visible_start) / cs.dot_clock_divider;
|
|
cs.display_vram_left = cs.regs.X;
|
|
horizontal_skip_pixels = 0;
|
|
}
|
|
else
|
|
{
|
|
horizontal_skip_pixels = (cs.horizontal_visible_start - horizontal_display_start) / cs.dot_clock_divider;
|
|
cs.display_origin_left = 0;
|
|
cs.display_vram_left = (cs.regs.X + horizontal_skip_pixels) % VRAM_WIDTH;
|
|
}
|
|
|
|
// apply the crop from the start (usually overscan)
|
|
cs.display_vram_width -= std::min(cs.display_vram_width, horizontal_skip_pixels);
|
|
|
|
// Apply crop from the end by shrinking VRAM rectangle width if display would end outside the visible area.
|
|
cs.display_vram_width = std::min<u16>(cs.display_vram_width, cs.display_width - cs.display_origin_left);
|
|
|
|
if (vertical_display_start >= cs.vertical_visible_start)
|
|
{
|
|
cs.display_origin_top = (vertical_display_start - cs.vertical_visible_start) << y_shift;
|
|
cs.display_vram_top = cs.regs.Y;
|
|
}
|
|
else
|
|
{
|
|
cs.display_origin_top = 0;
|
|
cs.display_vram_top = (cs.regs.Y + ((cs.vertical_visible_start - vertical_display_start) << y_shift)) % VRAM_HEIGHT;
|
|
}
|
|
|
|
if (vertical_display_end <= cs.vertical_visible_end)
|
|
{
|
|
cs.display_vram_height =
|
|
(vertical_display_end -
|
|
std::min(vertical_display_end, std::max(vertical_display_start, cs.vertical_visible_start)))
|
|
<< height_shift;
|
|
}
|
|
else
|
|
{
|
|
cs.display_vram_height =
|
|
(cs.vertical_visible_end -
|
|
std::min(cs.vertical_visible_end, std::max(vertical_display_start, cs.vertical_visible_start)))
|
|
<< height_shift;
|
|
}
|
|
|
|
if (old_horizontal_visible_start != cs.horizontal_visible_start ||
|
|
old_horizontal_visible_end != cs.horizontal_visible_end ||
|
|
old_vertical_visible_start != cs.vertical_visible_start || old_vertical_visible_end != cs.vertical_visible_end)
|
|
{
|
|
System::UpdateGTEAspectRatio();
|
|
}
|
|
|
|
if ((cs.display_vram_width != old_vram_width || cs.display_vram_height != old_vram_height) &&
|
|
g_settings.gpu_resolution_scale == 0)
|
|
{
|
|
GPUBackend::QueueUpdateResolutionScale();
|
|
}
|
|
}
|
|
|
|
TickCount GPU::GetPendingCRTCTicks() const
|
|
{
|
|
const TickCount pending_sysclk_ticks = s_crtc_tick_event.GetTicksSinceLastExecution();
|
|
TickCount fractional_ticks = m_crtc_state.fractional_ticks;
|
|
return SystemTicksToCRTCTicks(pending_sysclk_ticks, &fractional_ticks);
|
|
}
|
|
|
|
TickCount GPU::GetPendingCommandTicks() const
|
|
{
|
|
if (!s_command_tick_event.IsActive())
|
|
return 0;
|
|
|
|
return SystemTicksToGPUTicks(s_command_tick_event.GetTicksSinceLastExecution());
|
|
}
|
|
|
|
TickCount GPU::GetRemainingCommandTicks() const
|
|
{
|
|
return std::max<TickCount>(m_pending_command_ticks - GetPendingCommandTicks(), 0);
|
|
}
|
|
|
|
void GPU::UpdateCRTCTickEvent()
|
|
{
|
|
// figure out how many GPU ticks until the next vblank or event
|
|
TickCount lines_until_event;
|
|
if (Timers::IsSyncEnabled(HBLANK_TIMER_INDEX))
|
|
{
|
|
// when the timer sync is enabled we need to sync at vblank start and end
|
|
lines_until_event =
|
|
(m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end) ?
|
|
(m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_start) :
|
|
(m_crtc_state.vertical_display_end - m_crtc_state.current_scanline);
|
|
}
|
|
else
|
|
{
|
|
lines_until_event =
|
|
(m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end ?
|
|
(m_crtc_state.vertical_total - m_crtc_state.current_scanline + m_crtc_state.vertical_display_end) :
|
|
(m_crtc_state.vertical_display_end - m_crtc_state.current_scanline));
|
|
}
|
|
if (Timers::IsExternalIRQEnabled(HBLANK_TIMER_INDEX))
|
|
lines_until_event = std::min(lines_until_event, Timers::GetTicksUntilIRQ(HBLANK_TIMER_INDEX));
|
|
|
|
TickCount ticks_until_event =
|
|
lines_until_event * m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline;
|
|
if (Timers::IsExternalIRQEnabled(DOT_TIMER_INDEX))
|
|
{
|
|
const TickCount dots_until_irq = Timers::GetTicksUntilIRQ(DOT_TIMER_INDEX);
|
|
const TickCount ticks_until_irq =
|
|
(dots_until_irq * m_crtc_state.dot_clock_divider) - m_crtc_state.fractional_dot_ticks;
|
|
ticks_until_event = std::min(ticks_until_event, std::max<TickCount>(ticks_until_irq, 0));
|
|
}
|
|
|
|
if (Timers::IsSyncEnabled(DOT_TIMER_INDEX))
|
|
{
|
|
// This could potentially be optimized to skip the time the gate is active, if we're resetting and free running.
|
|
// But realistically, I've only seen sync off (most games), or reset+pause on gate (Konami Lightgun games).
|
|
TickCount ticks_until_hblank_start_or_end;
|
|
if (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end)
|
|
{
|
|
ticks_until_hblank_start_or_end =
|
|
m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline + m_crtc_state.horizontal_active_start;
|
|
}
|
|
else if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_active_start)
|
|
{
|
|
ticks_until_hblank_start_or_end = m_crtc_state.horizontal_active_start - m_crtc_state.current_tick_in_scanline;
|
|
}
|
|
else
|
|
{
|
|
ticks_until_hblank_start_or_end = m_crtc_state.horizontal_active_end - m_crtc_state.current_tick_in_scanline;
|
|
}
|
|
|
|
ticks_until_event = std::min(ticks_until_event, ticks_until_hblank_start_or_end);
|
|
}
|
|
|
|
if (!System::IsReplayingGPUDump()) [[likely]]
|
|
s_crtc_tick_event.Schedule(CRTCTicksToSystemTicks(ticks_until_event, m_crtc_state.fractional_ticks));
|
|
}
|
|
|
|
bool GPU::IsCRTCScanlinePending() const
|
|
{
|
|
// TODO: Most of these should be fields, not lines.
|
|
const TickCount ticks = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline);
|
|
return (ticks >= m_crtc_state.horizontal_total);
|
|
}
|
|
|
|
bool GPU::IsCommandCompletionPending() const
|
|
{
|
|
return (m_pending_command_ticks > 0 && GetPendingCommandTicks() >= m_pending_command_ticks);
|
|
}
|
|
|
|
void GPU::CRTCTickEvent(TickCount ticks)
|
|
{
|
|
// convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider
|
|
const TickCount prev_tick = m_crtc_state.current_tick_in_scanline;
|
|
const TickCount gpu_ticks = SystemTicksToCRTCTicks(ticks, &m_crtc_state.fractional_ticks);
|
|
m_crtc_state.current_tick_in_scanline += gpu_ticks;
|
|
|
|
if (Timers::IsUsingExternalClock(DOT_TIMER_INDEX))
|
|
{
|
|
m_crtc_state.fractional_dot_ticks += gpu_ticks;
|
|
const TickCount dots = m_crtc_state.fractional_dot_ticks / m_crtc_state.dot_clock_divider;
|
|
m_crtc_state.fractional_dot_ticks = m_crtc_state.fractional_dot_ticks % m_crtc_state.dot_clock_divider;
|
|
if (dots > 0)
|
|
Timers::AddTicks(DOT_TIMER_INDEX, dots);
|
|
}
|
|
|
|
if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_total)
|
|
{
|
|
// short path when we execute <1 line.. this shouldn't occur often, except when gated (konami lightgun games).
|
|
m_crtc_state.UpdateHBlankFlag();
|
|
Timers::SetGate(DOT_TIMER_INDEX, m_crtc_state.in_hblank);
|
|
if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX))
|
|
{
|
|
const u32 hblank_timer_ticks =
|
|
BoolToUInt32(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end) -
|
|
BoolToUInt32(prev_tick >= m_crtc_state.horizontal_active_end);
|
|
if (hblank_timer_ticks > 0)
|
|
Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast<TickCount>(hblank_timer_ticks));
|
|
}
|
|
|
|
UpdateCRTCTickEvent();
|
|
return;
|
|
}
|
|
|
|
u32 lines_to_draw = m_crtc_state.current_tick_in_scanline / m_crtc_state.horizontal_total;
|
|
m_crtc_state.current_tick_in_scanline %= m_crtc_state.horizontal_total;
|
|
#if 0
|
|
Log_WarningPrintf("Old line: %u, new line: %u, drawing %u", m_crtc_state.current_scanline,
|
|
m_crtc_state.current_scanline + lines_to_draw, lines_to_draw);
|
|
#endif
|
|
|
|
m_crtc_state.UpdateHBlankFlag();
|
|
Timers::SetGate(DOT_TIMER_INDEX, m_crtc_state.in_hblank);
|
|
|
|
if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX))
|
|
{
|
|
// lines_to_draw => number of times ticks passed horizontal_total.
|
|
// Subtract one if we were previously in hblank, but only on that line. If it was previously less than
|
|
// horizontal_active_start, we still want to add one, because hblank would have gone inactive, and then active again
|
|
// during the line. Finally add the current line being drawn, if hblank went inactive->active during the line.
|
|
const u32 hblank_timer_ticks =
|
|
lines_to_draw - BoolToUInt32(prev_tick >= m_crtc_state.horizontal_active_end) +
|
|
BoolToUInt32(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end);
|
|
if (hblank_timer_ticks > 0)
|
|
Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast<TickCount>(hblank_timer_ticks));
|
|
}
|
|
|
|
bool frame_done = false;
|
|
while (lines_to_draw > 0)
|
|
{
|
|
const u32 lines_to_draw_this_loop =
|
|
std::min(lines_to_draw, m_crtc_state.vertical_total - m_crtc_state.current_scanline);
|
|
const u32 prev_scanline = m_crtc_state.current_scanline;
|
|
m_crtc_state.current_scanline += lines_to_draw_this_loop;
|
|
DebugAssert(m_crtc_state.current_scanline <= m_crtc_state.vertical_total);
|
|
lines_to_draw -= lines_to_draw_this_loop;
|
|
|
|
// clear the vblank flag if the beam would pass through the display area
|
|
if (prev_scanline < m_crtc_state.vertical_display_start &&
|
|
m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end)
|
|
{
|
|
Timers::SetGate(HBLANK_TIMER_INDEX, false);
|
|
InterruptController::SetLineState(InterruptController::IRQ::VBLANK, false);
|
|
m_crtc_state.in_vblank = false;
|
|
}
|
|
|
|
const bool new_vblank = m_crtc_state.current_scanline < m_crtc_state.vertical_display_start ||
|
|
m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end;
|
|
if (m_crtc_state.in_vblank != new_vblank)
|
|
{
|
|
if (new_vblank)
|
|
{
|
|
DEBUG_LOG("Now in v-blank");
|
|
|
|
if (m_gpu_dump) [[unlikely]]
|
|
{
|
|
m_gpu_dump->WriteVSync(System::GetGlobalTickCounter());
|
|
if (m_gpu_dump->IsFinished()) [[unlikely]]
|
|
StopRecordingGPUDump();
|
|
}
|
|
|
|
// flush any pending draws and "scan out" the image
|
|
// TODO: move present in here I guess
|
|
System::IncrementFrameNumber();
|
|
UpdateDisplay(!System::IsRunaheadActive());
|
|
frame_done = true;
|
|
|
|
// switch fields early. this is needed so we draw to the correct one.
|
|
if (m_GPUSTAT.InInterleaved480iMode())
|
|
m_crtc_state.interlaced_display_field = m_crtc_state.interlaced_field ^ 1u;
|
|
else
|
|
m_crtc_state.interlaced_display_field = 0;
|
|
|
|
#ifdef PSX_GPU_STATS
|
|
if ((++s_active_gpu_cycles_frames) == 60)
|
|
{
|
|
const double busy_frac =
|
|
static_cast<double>(s_active_gpu_cycles) /
|
|
static_cast<double>(SystemTicksToGPUTicks(System::ScaleTicksToOverclock(System::MASTER_CLOCK)) *
|
|
(ComputeVerticalFrequency() / 60.0f));
|
|
DEV_LOG("PSX GPU Usage: {:.2f}% [{:.0f} cycles avg per frame]", busy_frac * 100,
|
|
static_cast<double>(s_active_gpu_cycles) / static_cast<double>(s_active_gpu_cycles_frames));
|
|
s_active_gpu_cycles = 0;
|
|
s_active_gpu_cycles_frames = 0;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
Timers::SetGate(HBLANK_TIMER_INDEX, new_vblank);
|
|
InterruptController::SetLineState(InterruptController::IRQ::VBLANK, new_vblank);
|
|
m_crtc_state.in_vblank = new_vblank;
|
|
}
|
|
|
|
// past the end of vblank?
|
|
if (m_crtc_state.current_scanline == m_crtc_state.vertical_total)
|
|
{
|
|
// start the new frame
|
|
m_crtc_state.current_scanline = 0;
|
|
if (m_GPUSTAT.vertical_interlace)
|
|
{
|
|
m_crtc_state.interlaced_field ^= 1u;
|
|
m_GPUSTAT.interlaced_field = !m_crtc_state.interlaced_field;
|
|
}
|
|
else
|
|
{
|
|
m_crtc_state.interlaced_field = 0;
|
|
m_GPUSTAT.interlaced_field = 0u; // new GPU = 1, old GPU = 0
|
|
}
|
|
}
|
|
}
|
|
|
|
// alternating even line bit in 240-line mode
|
|
if (m_GPUSTAT.InInterleaved480iMode())
|
|
{
|
|
m_crtc_state.active_line_lsb =
|
|
Truncate8((m_crtc_state.regs.Y + BoolToUInt32(m_crtc_state.interlaced_display_field)) & u32(1));
|
|
m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked(
|
|
(m_crtc_state.regs.Y + (BoolToUInt8(!m_crtc_state.in_vblank) & m_crtc_state.interlaced_display_field)) & u32(1));
|
|
}
|
|
else
|
|
{
|
|
m_crtc_state.active_line_lsb = 0;
|
|
m_GPUSTAT.display_line_lsb = ConvertToBoolUnchecked((m_crtc_state.regs.Y + m_crtc_state.current_scanline) & u32(1));
|
|
}
|
|
|
|
UpdateCRTCTickEvent();
|
|
|
|
if (frame_done)
|
|
{
|
|
// we can't issue frame done if we're in the middle of executing a rec block, e.g. from reading GPUSTAT
|
|
// defer it until the end of the block in this case.
|
|
if (!TimingEvents::IsRunningEvents()) [[unlikely]]
|
|
{
|
|
DEBUG_LOG("Deferring frame done call");
|
|
s_frame_done_event.Schedule(0);
|
|
}
|
|
else
|
|
{
|
|
System::FrameDone();
|
|
}
|
|
}
|
|
}
|
|
|
|
void GPU::CommandTickEvent(TickCount ticks)
|
|
{
|
|
m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
|
|
|
|
m_executing_commands = true;
|
|
ExecuteCommands();
|
|
UpdateCommandTickEvent();
|
|
m_executing_commands = false;
|
|
}
|
|
|
|
void GPU::FrameDoneEvent(TickCount ticks)
|
|
{
|
|
DebugAssert(TimingEvents::IsRunningEvents());
|
|
s_frame_done_event.Deactivate();
|
|
System::FrameDone();
|
|
}
|
|
|
|
void GPU::UpdateCommandTickEvent()
|
|
{
|
|
if (m_pending_command_ticks <= 0)
|
|
{
|
|
m_pending_command_ticks = 0;
|
|
s_command_tick_event.Deactivate();
|
|
}
|
|
else
|
|
{
|
|
s_command_tick_event.SetIntervalAndSchedule(GPUTicksToSystemTicks(m_pending_command_ticks));
|
|
}
|
|
}
|
|
|
|
void GPU::ConvertScreenCoordinatesToDisplayCoordinates(float window_x, float window_y, float* display_x,
|
|
float* display_y) const
|
|
{
|
|
const WindowInfo& wi = GPUThread::GetRenderWindowInfo();
|
|
if (wi.IsSurfaceless())
|
|
{
|
|
*display_x = *display_y = -1.0f;
|
|
return;
|
|
}
|
|
|
|
GSVector4i display_rc, draw_rc;
|
|
CalculateDrawRect(wi.surface_width, wi.surface_height, m_crtc_state.display_width, m_crtc_state.display_height,
|
|
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, m_crtc_state.display_vram_width,
|
|
m_crtc_state.display_vram_height, g_settings.display_rotation, g_settings.display_alignment,
|
|
ComputePixelAspectRatio(), g_settings.display_stretch_vertically,
|
|
(g_settings.display_scaling == DisplayScalingMode::NearestInteger ||
|
|
g_settings.display_scaling == DisplayScalingMode::BilinearInteger),
|
|
&display_rc, &draw_rc);
|
|
|
|
// convert coordinates to active display region, then to full display region
|
|
const float scaled_display_x =
|
|
(window_x - static_cast<float>(display_rc.left)) / static_cast<float>(display_rc.width());
|
|
const float scaled_display_y =
|
|
(window_y - static_cast<float>(display_rc.top)) / static_cast<float>(display_rc.height());
|
|
|
|
// scale back to internal resolution
|
|
*display_x = scaled_display_x * static_cast<float>(m_crtc_state.display_width);
|
|
*display_y = scaled_display_y * static_cast<float>(m_crtc_state.display_height);
|
|
|
|
// TODO: apply rotation matrix
|
|
|
|
DEV_LOG("win {:.0f},{:.0f} -> local {:.0f},{:.0f}, disp {:.2f},{:.2f} (size {},{} frac {},{})", window_x, window_y,
|
|
window_x - display_rc.left, window_y - display_rc.top, *display_x, *display_y, m_crtc_state.display_width,
|
|
m_crtc_state.display_height, *display_x / static_cast<float>(m_crtc_state.display_width),
|
|
*display_y / static_cast<float>(m_crtc_state.display_height));
|
|
}
|
|
|
|
bool GPU::ConvertDisplayCoordinatesToBeamTicksAndLines(float display_x, float display_y, float x_scale, u32* out_tick,
|
|
u32* out_line) const
|
|
{
|
|
if (x_scale != 1.0f)
|
|
{
|
|
const float dw = static_cast<float>(m_crtc_state.display_width);
|
|
float scaled_x = ((display_x / dw) * 2.0f) - 1.0f; // 0..1 -> -1..1
|
|
scaled_x *= x_scale;
|
|
display_x = (((scaled_x + 1.0f) * 0.5f) * dw); // -1..1 -> 0..1
|
|
}
|
|
|
|
if (display_x < 0 || static_cast<u32>(display_x) >= m_crtc_state.display_width || display_y < 0 ||
|
|
static_cast<u32>(display_y) >= m_crtc_state.display_height)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
*out_line = (static_cast<u32>(std::round(display_y)) >> BoolToUInt8(IsInterlacedDisplayEnabled())) +
|
|
m_crtc_state.vertical_visible_start;
|
|
*out_tick = static_cast<u32>(System::ScaleTicksToOverclock(
|
|
static_cast<TickCount>(std::round(display_x * static_cast<float>(m_crtc_state.dot_clock_divider))))) +
|
|
m_crtc_state.horizontal_visible_start;
|
|
return true;
|
|
}
|
|
|
|
void GPU::GetBeamPosition(u32* out_ticks, u32* out_line)
|
|
{
|
|
const u32 current_tick = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline);
|
|
*out_line =
|
|
(m_crtc_state.current_scanline + (current_tick / m_crtc_state.horizontal_total)) % m_crtc_state.vertical_total;
|
|
*out_ticks = current_tick % m_crtc_state.horizontal_total;
|
|
}
|
|
|
|
TickCount GPU::GetSystemTicksUntilTicksAndLine(u32 ticks, u32 line)
|
|
{
|
|
u32 current_tick, current_line;
|
|
GetBeamPosition(¤t_tick, ¤t_line);
|
|
|
|
u32 ticks_to_target;
|
|
if (ticks >= current_tick)
|
|
{
|
|
ticks_to_target = ticks - current_tick;
|
|
}
|
|
else
|
|
{
|
|
ticks_to_target = (m_crtc_state.horizontal_total - current_tick) + ticks;
|
|
current_line = (current_line + 1) % m_crtc_state.vertical_total;
|
|
}
|
|
|
|
const u32 lines_to_target =
|
|
(line >= current_line) ? (line - current_line) : ((m_crtc_state.vertical_total - current_line) + line);
|
|
|
|
const TickCount total_ticks_to_target =
|
|
static_cast<TickCount>((lines_to_target * m_crtc_state.horizontal_total) + ticks_to_target);
|
|
|
|
return CRTCTicksToSystemTicks(total_ticks_to_target, m_crtc_state.fractional_ticks);
|
|
}
|
|
|
|
u32 GPU::ReadGPUREAD()
|
|
{
|
|
if (m_blitter_state != BlitterState::ReadingVRAM)
|
|
return m_GPUREAD_latch;
|
|
|
|
// Read two pixels out of VRAM and combine them. Zero fill odd pixel counts.
|
|
u32 value = 0;
|
|
for (u32 i = 0; i < 2; i++)
|
|
{
|
|
// Read with correct wrap-around behavior.
|
|
const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
|
|
const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
|
|
value |= ZeroExtend32(g_vram[read_y * VRAM_WIDTH + read_x]) << (i * 16);
|
|
|
|
if (++m_vram_transfer.col == m_vram_transfer.width)
|
|
{
|
|
m_vram_transfer.col = 0;
|
|
|
|
if (++m_vram_transfer.row == m_vram_transfer.height)
|
|
{
|
|
DEBUG_LOG("End of VRAM->CPU transfer");
|
|
m_vram_transfer = {};
|
|
m_blitter_state = BlitterState::Idle;
|
|
|
|
// end of transfer, catch up on any commands which were written (unlikely)
|
|
ExecuteCommands();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
m_GPUREAD_latch = value;
|
|
return value;
|
|
}
|
|
|
|
void GPU::WriteGP1(u32 value)
|
|
{
|
|
const u32 command = (value >> 24) & 0x3Fu;
|
|
const u32 param = value & UINT32_C(0x00FFFFFF);
|
|
switch (command)
|
|
{
|
|
case static_cast<u8>(GP1Command::ResetGPU):
|
|
{
|
|
DEBUG_LOG("GP1 reset GPU");
|
|
s_command_tick_event.InvokeEarly();
|
|
SynchronizeCRTC();
|
|
SoftReset();
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::ClearFIFO):
|
|
{
|
|
DEBUG_LOG("GP1 clear FIFO");
|
|
s_command_tick_event.InvokeEarly();
|
|
SynchronizeCRTC();
|
|
|
|
// flush partial writes
|
|
if (m_blitter_state == BlitterState::WritingVRAM)
|
|
FinishVRAMWrite();
|
|
|
|
m_blitter_state = BlitterState::Idle;
|
|
m_command_total_words = 0;
|
|
m_vram_transfer = {};
|
|
m_fifo.Clear();
|
|
m_blit_buffer.clear();
|
|
m_blit_remaining_words = 0;
|
|
m_pending_command_ticks = 0;
|
|
s_command_tick_event.Deactivate();
|
|
UpdateDMARequest();
|
|
UpdateGPUIdle();
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::AcknowledgeInterrupt):
|
|
{
|
|
DEBUG_LOG("Acknowledge interrupt");
|
|
m_GPUSTAT.interrupt_request = false;
|
|
InterruptController::SetLineState(InterruptController::IRQ::GPU, false);
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::SetDisplayDisable):
|
|
{
|
|
const bool disable = ConvertToBoolUnchecked(value & 0x01);
|
|
DEBUG_LOG("Display {}", disable ? "disabled" : "enabled");
|
|
SynchronizeCRTC();
|
|
|
|
if (!m_GPUSTAT.display_disable && disable && IsInterlacedDisplayEnabled())
|
|
ClearDisplay();
|
|
|
|
m_GPUSTAT.display_disable = disable;
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::SetDMADirection):
|
|
{
|
|
DEBUG_LOG("DMA direction <- 0x{:02X}", static_cast<u32>(param));
|
|
if (m_GPUSTAT.dma_direction != static_cast<GPUDMADirection>(param))
|
|
{
|
|
m_GPUSTAT.dma_direction = static_cast<GPUDMADirection>(param);
|
|
UpdateDMARequest();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::SetDisplayStartAddress):
|
|
{
|
|
const u32 new_value = param & CRTCState::Regs::DISPLAY_ADDRESS_START_MASK;
|
|
DEBUG_LOG("Display address start <- 0x{:08X}", new_value);
|
|
|
|
System::IncrementInternalFrameNumber();
|
|
if (m_crtc_state.regs.display_address_start != new_value)
|
|
{
|
|
SynchronizeCRTC();
|
|
m_crtc_state.regs.display_address_start = new_value;
|
|
UpdateCRTCDisplayParameters();
|
|
GPUBackend::PushCommand(GPUBackend::NewBufferSwappedCommand());
|
|
}
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::SetHorizontalDisplayRange):
|
|
{
|
|
const u32 new_value = param & CRTCState::Regs::HORIZONTAL_DISPLAY_RANGE_MASK;
|
|
DEBUG_LOG("Horizontal display range <- 0x{:08X}", new_value);
|
|
|
|
if (m_crtc_state.regs.horizontal_display_range != new_value)
|
|
{
|
|
SynchronizeCRTC();
|
|
m_crtc_state.regs.horizontal_display_range = new_value;
|
|
UpdateCRTCConfig();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::SetVerticalDisplayRange):
|
|
{
|
|
const u32 new_value = param & CRTCState::Regs::VERTICAL_DISPLAY_RANGE_MASK;
|
|
DEBUG_LOG("Vertical display range <- 0x{:08X}", new_value);
|
|
|
|
if (m_crtc_state.regs.vertical_display_range != new_value)
|
|
{
|
|
SynchronizeCRTC();
|
|
m_crtc_state.regs.vertical_display_range = new_value;
|
|
UpdateCRTCConfig();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::SetDisplayMode):
|
|
{
|
|
const GP1SetDisplayMode dm{param};
|
|
GPUSTAT new_GPUSTAT{m_GPUSTAT.bits};
|
|
new_GPUSTAT.horizontal_resolution_1 = dm.horizontal_resolution_1;
|
|
new_GPUSTAT.vertical_resolution = dm.vertical_resolution;
|
|
new_GPUSTAT.pal_mode = dm.pal_mode;
|
|
new_GPUSTAT.display_area_color_depth_24 = dm.display_area_color_depth;
|
|
new_GPUSTAT.vertical_interlace = dm.vertical_interlace;
|
|
new_GPUSTAT.horizontal_resolution_2 = dm.horizontal_resolution_2;
|
|
new_GPUSTAT.reverse_flag = dm.reverse_flag;
|
|
DEBUG_LOG("Set display mode <- 0x{:08X}", dm.bits);
|
|
|
|
if (!m_GPUSTAT.vertical_interlace && dm.vertical_interlace && !m_force_progressive_scan)
|
|
{
|
|
// bit of a hack, technically we should pull the previous frame in, but this may not exist anymore
|
|
ClearDisplay();
|
|
}
|
|
|
|
if (m_GPUSTAT.bits != new_GPUSTAT.bits)
|
|
{
|
|
// Have to be careful when setting this because Synchronize() can modify GPUSTAT.
|
|
static constexpr u32 SET_MASK = UINT32_C(0b00000000011111110100000000000000);
|
|
s_command_tick_event.InvokeEarly();
|
|
SynchronizeCRTC();
|
|
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~SET_MASK) | (new_GPUSTAT.bits & SET_MASK);
|
|
UpdateCRTCConfig();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case static_cast<u8>(GP1Command::SetAllowTextureDisable):
|
|
{
|
|
m_set_texture_disable_mask = ConvertToBoolUnchecked(param & 0x01);
|
|
DEBUG_LOG("Set texture disable mask <- {}", m_set_texture_disable_mask ? "allowed" : "ignored");
|
|
}
|
|
break;
|
|
|
|
case 0x10:
|
|
case 0x11:
|
|
case 0x12:
|
|
case 0x13:
|
|
case 0x14:
|
|
case 0x15:
|
|
case 0x16:
|
|
case 0x17:
|
|
case 0x18:
|
|
case 0x19:
|
|
case 0x1A:
|
|
case 0x1B:
|
|
case 0x1C:
|
|
case 0x1D:
|
|
case 0x1E:
|
|
case 0x1F:
|
|
{
|
|
HandleGetGPUInfoCommand(value);
|
|
}
|
|
break;
|
|
|
|
[[unlikely]] default:
|
|
ERROR_LOG("Unimplemented GP1 command 0x{:02X}", command);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void GPU::HandleGetGPUInfoCommand(u32 value)
|
|
{
|
|
const u8 subcommand = Truncate8(value & 0x07);
|
|
switch (subcommand)
|
|
{
|
|
case 0x00:
|
|
case 0x01:
|
|
case 0x06:
|
|
case 0x07:
|
|
// leave GPUREAD intact
|
|
break;
|
|
|
|
case 0x02: // Get Texture Window
|
|
{
|
|
m_GPUREAD_latch = m_draw_mode.texture_window_value;
|
|
DEBUG_LOG("Get texture window => 0x{:08X}", m_GPUREAD_latch);
|
|
}
|
|
break;
|
|
|
|
case 0x03: // Get Draw Area Top Left
|
|
{
|
|
m_GPUREAD_latch = (m_drawing_area.left | (m_drawing_area.top << 10));
|
|
DEBUG_LOG("Get drawing area top left: ({}, {}) => 0x{:08X}", m_drawing_area.left, m_drawing_area.top,
|
|
m_GPUREAD_latch);
|
|
}
|
|
break;
|
|
|
|
case 0x04: // Get Draw Area Bottom Right
|
|
{
|
|
m_GPUREAD_latch = (m_drawing_area.right | (m_drawing_area.bottom << 10));
|
|
DEBUG_LOG("Get drawing area bottom right: ({}, {}) => 0x{:08X}", m_drawing_area.bottom, m_drawing_area.right,
|
|
m_GPUREAD_latch);
|
|
}
|
|
break;
|
|
|
|
case 0x05: // Get Drawing Offset
|
|
{
|
|
m_GPUREAD_latch = (m_drawing_offset.x & 0x7FF) | ((m_drawing_offset.y & 0x7FF) << 11);
|
|
DEBUG_LOG("Get drawing offset: ({}, {}) => 0x{:08X}", m_drawing_offset.x, m_drawing_offset.y, m_GPUREAD_latch);
|
|
}
|
|
break;
|
|
|
|
[[unlikely]] default:
|
|
WARNING_LOG("Unhandled GetGPUInfo(0x{:02X})", subcommand);
|
|
break;
|
|
}
|
|
}
|
|
|
|
void GPU::UpdateCLUTIfNeeded(GPUTextureMode texmode, GPUTexturePaletteReg clut)
|
|
{
|
|
if (texmode >= GPUTextureMode::Direct16Bit)
|
|
return;
|
|
|
|
const bool needs_8bit = (texmode == GPUTextureMode::Palette8Bit);
|
|
if ((clut.bits != m_current_clut_reg_bits) || BoolToUInt8(needs_8bit) > BoolToUInt8(m_current_clut_is_8bit))
|
|
{
|
|
DEBUG_LOG("Reloading CLUT from {},{}, {}", clut.GetXBase(), clut.GetYBase(), needs_8bit ? "8-bit" : "4-bit");
|
|
AddCommandTicks(needs_8bit ? 256 : 16);
|
|
m_current_clut_reg_bits = clut.bits;
|
|
m_current_clut_is_8bit = needs_8bit;
|
|
|
|
GPUBackendUpdateCLUTCommand* cmd = GPUBackend::NewUpdateCLUTCommand();
|
|
cmd->reg.bits = clut.bits;
|
|
cmd->clut_is_8bit = needs_8bit;
|
|
GPUBackend::PushCommand(cmd);
|
|
}
|
|
}
|
|
|
|
void GPU::InvalidateCLUT()
|
|
{
|
|
m_current_clut_reg_bits = std::numeric_limits<decltype(m_current_clut_reg_bits)>::max(); // will never match
|
|
m_current_clut_is_8bit = false;
|
|
}
|
|
|
|
bool GPU::IsCLUTValid() const
|
|
{
|
|
return (m_current_clut_reg_bits != std::numeric_limits<decltype(m_current_clut_reg_bits)>::max());
|
|
}
|
|
|
|
void GPU::SetClampedDrawingArea()
|
|
{
|
|
m_clamped_drawing_area = GetClampedDrawingArea(m_drawing_area);
|
|
}
|
|
|
|
GSVector4i GPU::GetClampedDrawingArea(const GPUDrawingArea& drawing_area)
|
|
{
|
|
if (drawing_area.left > drawing_area.right || drawing_area.top > drawing_area.bottom) [[unlikely]]
|
|
return GSVector4i::zero();
|
|
|
|
const u32 right = std::min(drawing_area.right + 1, static_cast<u32>(VRAM_WIDTH));
|
|
const u32 left = std::min(drawing_area.left, std::min(drawing_area.right, VRAM_WIDTH - 1));
|
|
const u32 bottom = std::min(drawing_area.bottom + 1, static_cast<u32>(VRAM_HEIGHT));
|
|
const u32 top = std::min(drawing_area.top, std::min(drawing_area.bottom, VRAM_HEIGHT - 1));
|
|
return GSVector4i(left, top, right, bottom);
|
|
}
|
|
|
|
void GPU::SetDrawMode(u16 value)
|
|
{
|
|
GPUDrawModeReg new_mode_reg{static_cast<u16>(value & GPUDrawModeReg::MASK)};
|
|
if (!m_set_texture_disable_mask)
|
|
new_mode_reg.texture_disable = false;
|
|
|
|
m_draw_mode.mode_reg.bits = new_mode_reg.bits;
|
|
|
|
// Bits 0..10 are returned in the GPU status register.
|
|
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) |
|
|
(ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK);
|
|
m_GPUSTAT.texture_disable = m_draw_mode.mode_reg.texture_disable;
|
|
}
|
|
|
|
void GPU::SetTexturePalette(u16 value)
|
|
{
|
|
value &= DrawMode::PALETTE_MASK;
|
|
m_draw_mode.palette_reg.bits = value;
|
|
}
|
|
|
|
void GPU::SetTextureWindow(u32 value)
|
|
{
|
|
value &= DrawMode::TEXTURE_WINDOW_MASK;
|
|
if (m_draw_mode.texture_window_value == value)
|
|
return;
|
|
|
|
const u8 mask_x = Truncate8(value & UINT32_C(0x1F));
|
|
const u8 mask_y = Truncate8((value >> 5) & UINT32_C(0x1F));
|
|
const u8 offset_x = Truncate8((value >> 10) & UINT32_C(0x1F));
|
|
const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F));
|
|
DEBUG_LOG("Set texture window {:02X} {:02X} {:02X} {:02X}", mask_x, mask_y, offset_x, offset_y);
|
|
|
|
m_draw_mode.texture_window.and_x = ~(mask_x * 8);
|
|
m_draw_mode.texture_window.and_y = ~(mask_y * 8);
|
|
m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u;
|
|
m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u;
|
|
m_draw_mode.texture_window_value = value;
|
|
}
|
|
|
|
void GPU::CalculateDrawRect(u32 window_width, u32 window_height, u32 crtc_display_width, u32 crtc_display_height,
|
|
s32 display_origin_left, s32 display_origin_top, u32 display_vram_width,
|
|
u32 display_vram_height, DisplayRotation rotation, DisplayAlignment alignment,
|
|
float pixel_aspect_ratio, bool stretch_vertically, bool integer_scale,
|
|
GSVector4i* display_rect, GSVector4i* draw_rect)
|
|
{
|
|
const float window_ratio = static_cast<float>(window_width) / static_cast<float>(window_height);
|
|
const float x_scale = pixel_aspect_ratio;
|
|
float display_width = static_cast<float>(crtc_display_width);
|
|
float display_height = static_cast<float>(crtc_display_height);
|
|
float active_left = static_cast<float>(display_origin_left);
|
|
float active_top = static_cast<float>(display_origin_top);
|
|
float active_width = static_cast<float>(display_vram_width);
|
|
float active_height = static_cast<float>(display_vram_height);
|
|
if (!stretch_vertically)
|
|
{
|
|
display_width *= x_scale;
|
|
active_left *= x_scale;
|
|
active_width *= x_scale;
|
|
}
|
|
else
|
|
{
|
|
display_height /= x_scale;
|
|
active_top /= x_scale;
|
|
active_height /= x_scale;
|
|
}
|
|
|
|
// swap width/height when rotated, the flipping of padding is taken care of in the shader with the rotation matrix
|
|
if (rotation == DisplayRotation::Rotate90 || rotation == DisplayRotation::Rotate270)
|
|
{
|
|
std::swap(display_width, display_height);
|
|
std::swap(active_width, active_height);
|
|
std::swap(active_top, active_left);
|
|
}
|
|
|
|
// now fit it within the window
|
|
float scale;
|
|
float left_padding, top_padding;
|
|
if ((display_width / display_height) >= window_ratio)
|
|
{
|
|
// align in middle vertically
|
|
scale = static_cast<float>(window_width) / display_width;
|
|
if (integer_scale)
|
|
{
|
|
scale = std::max(std::floor(scale), 1.0f);
|
|
left_padding = std::max<float>((static_cast<float>(window_width) - display_width * scale) / 2.0f, 0.0f);
|
|
}
|
|
else
|
|
{
|
|
left_padding = 0.0f;
|
|
}
|
|
|
|
switch (alignment)
|
|
{
|
|
case DisplayAlignment::RightOrBottom:
|
|
top_padding = std::max<float>(static_cast<float>(window_height) - (display_height * scale), 0.0f);
|
|
break;
|
|
|
|
case DisplayAlignment::Center:
|
|
top_padding = std::max<float>((static_cast<float>(window_height) - (display_height * scale)) / 2.0f, 0.0f);
|
|
break;
|
|
|
|
case DisplayAlignment::LeftOrTop:
|
|
default:
|
|
top_padding = 0.0f;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// align in middle horizontally
|
|
scale = static_cast<float>(window_height) / display_height;
|
|
if (integer_scale)
|
|
{
|
|
scale = std::max(std::floor(scale), 1.0f);
|
|
top_padding = std::max<float>((static_cast<float>(window_height) - (display_height * scale)) / 2.0f, 0.0f);
|
|
}
|
|
else
|
|
{
|
|
top_padding = 0.0f;
|
|
}
|
|
|
|
switch (alignment)
|
|
{
|
|
case DisplayAlignment::RightOrBottom:
|
|
left_padding = std::max<float>(static_cast<float>(window_width) - (display_width * scale), 0.0f);
|
|
break;
|
|
|
|
case DisplayAlignment::Center:
|
|
left_padding = std::max<float>((static_cast<float>(window_width) - (display_width * scale)) / 2.0f, 0.0f);
|
|
break;
|
|
|
|
case DisplayAlignment::LeftOrTop:
|
|
default:
|
|
left_padding = 0.0f;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// TODO: This should be a float rectangle. But because GL is lame, it only has integer viewports...
|
|
const s32 left = static_cast<s32>(active_left * scale + left_padding);
|
|
const s32 top = static_cast<s32>(active_top * scale + top_padding);
|
|
const s32 right = left + static_cast<s32>(active_width * scale);
|
|
const s32 bottom = top + static_cast<s32>(active_height * scale);
|
|
*draw_rect = GSVector4i(left, top, right, bottom);
|
|
*display_rect = GSVector4i(
|
|
GSVector4(left_padding, top_padding, left_padding + display_width * scale, top_padding + display_height * scale));
|
|
}
|
|
|
|
void GPU::ReadVRAM(u16 x, u16 y, u16 width, u16 height)
|
|
{
|
|
// If we're using the software renderer, we only need to sync the thread.
|
|
if (!GPUBackend::IsUsingHardwareBackend() || g_settings.gpu_use_software_renderer_for_readbacks)
|
|
{
|
|
GPUBackend::SyncGPUThread(true);
|
|
return;
|
|
}
|
|
|
|
GPUBackendReadVRAMCommand* cmd = GPUBackend::NewReadVRAMCommand();
|
|
cmd->x = x;
|
|
cmd->y = y;
|
|
cmd->width = width;
|
|
cmd->height = height;
|
|
GPUBackend::PushCommandAndSync(cmd, true);
|
|
}
|
|
|
|
void GPU::UpdateVRAM(u16 x, u16 y, u16 width, u16 height, const void* data, bool set_mask, bool check_mask)
|
|
{
|
|
const u32 num_words = width * height;
|
|
GPUBackendUpdateVRAMCommand* cmd = GPUBackend::NewUpdateVRAMCommand(num_words);
|
|
cmd->x = x;
|
|
cmd->y = y;
|
|
cmd->width = width;
|
|
cmd->height = height;
|
|
cmd->set_mask_while_drawing = set_mask;
|
|
cmd->check_mask_before_draw = check_mask;
|
|
std::memcpy(cmd->data, data, num_words * sizeof(u16));
|
|
GPUBackend::PushCommand(cmd);
|
|
}
|
|
|
|
void GPU::ClearDisplay()
|
|
{
|
|
GPUBackend::PushCommand(GPUBackend::NewClearDisplayCommand());
|
|
}
|
|
|
|
void GPU::UpdateDisplay(bool submit_frame)
|
|
{
|
|
const bool interlaced = IsInterlacedDisplayEnabled();
|
|
const u8 interlaced_field = GetInterlacedDisplayField();
|
|
const bool line_skip = (interlaced && m_GPUSTAT.vertical_resolution);
|
|
GPUBackendUpdateDisplayCommand* cmd = GPUBackend::NewUpdateDisplayCommand();
|
|
cmd->display_width = m_crtc_state.display_width;
|
|
cmd->display_height = m_crtc_state.display_height;
|
|
cmd->display_origin_left = m_crtc_state.display_origin_left;
|
|
cmd->display_origin_top = m_crtc_state.display_origin_top;
|
|
cmd->display_vram_left = m_crtc_state.display_vram_left;
|
|
cmd->display_vram_top = m_crtc_state.display_vram_top + (interlaced_field & BoolToUInt8(line_skip));
|
|
cmd->display_vram_width = m_crtc_state.display_vram_width;
|
|
cmd->display_vram_height = m_crtc_state.display_vram_height >> BoolToUInt8(interlaced);
|
|
cmd->X = m_crtc_state.regs.X;
|
|
cmd->interlaced_display_enabled = interlaced;
|
|
cmd->interlaced_display_field = ConvertToBoolUnchecked(interlaced_field);
|
|
cmd->interlaced_display_interleaved = line_skip;
|
|
cmd->display_24bit = m_GPUSTAT.display_area_color_depth_24;
|
|
cmd->display_disabled = IsDisplayDisabled();
|
|
cmd->display_pixel_aspect_ratio = ComputePixelAspectRatio();
|
|
if ((cmd->submit_frame = submit_frame && System::GetFramePresentationParameters(&cmd->frame)))
|
|
{
|
|
const bool drain_one = cmd->frame.present_frame && GPUBackend::BeginQueueFrame();
|
|
GPUThread::PushCommandAndWakeThread(cmd);
|
|
if (drain_one)
|
|
GPUBackend::WaitForOneQueuedFrame();
|
|
}
|
|
else
|
|
{
|
|
GPUThread::PushCommand(cmd);
|
|
}
|
|
}
|
|
|
|
void GPU::QueuePresentCurrentFrame()
|
|
{
|
|
DebugAssert(g_settings.IsRunaheadEnabled());
|
|
|
|
// Submit can be skipped if it's a dupe frame and we're not dumping frames.
|
|
GPUBackendSubmitFrameCommand* cmd = GPUBackend::NewSubmitFrameCommand();
|
|
if (System::GetFramePresentationParameters(&cmd->frame))
|
|
{
|
|
const bool drain_one = cmd->frame.present_frame && GPUBackend::BeginQueueFrame();
|
|
GPUThread::PushCommandAndWakeThread(cmd);
|
|
if (drain_one)
|
|
GPUBackend::WaitForOneQueuedFrame();
|
|
}
|
|
}
|
|
|
|
bool GPU::DumpVRAMToFile(const char* filename)
|
|
{
|
|
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
|
|
const char* extension = std::strrchr(filename, '.');
|
|
if (extension && StringUtil::Strcasecmp(extension, ".png") == 0)
|
|
{
|
|
return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, g_vram, true);
|
|
}
|
|
else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0)
|
|
{
|
|
return FileSystem::WriteBinaryFile(filename, g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
|
|
}
|
|
else
|
|
{
|
|
ERROR_LOG("Unknown extension: '{}'", filename);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha)
|
|
{
|
|
Image image(width, height, ImageFormat::RGBA8);
|
|
|
|
const char* ptr_in = static_cast<const char*>(buffer);
|
|
for (u32 row = 0; row < height; row++)
|
|
{
|
|
const char* row_ptr_in = ptr_in;
|
|
u8* ptr_out = image.GetRowPixels(row);
|
|
|
|
for (u32 col = 0; col < width; col++)
|
|
{
|
|
u16 src_col;
|
|
std::memcpy(&src_col, row_ptr_in, sizeof(u16));
|
|
row_ptr_in += sizeof(u16);
|
|
|
|
const u32 pixel32 = VRAMRGBA5551ToRGBA8888(remove_alpha ? (src_col | u16(0x8000)) : src_col);
|
|
std::memcpy(ptr_out, &pixel32, sizeof(pixel32));
|
|
ptr_out += sizeof(pixel32);
|
|
}
|
|
|
|
ptr_in += stride;
|
|
}
|
|
|
|
return image.SaveToFile(filename);
|
|
}
|
|
|
|
void GPU::DrawDebugStateWindow(float scale)
|
|
{
|
|
if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen))
|
|
{
|
|
static constexpr std::array<const char*, 5> state_strings = {
|
|
{"Idle", "Reading VRAM", "Writing VRAM", "Drawing Polyline"}};
|
|
|
|
ImGui::Text("State: %s", state_strings[static_cast<u8>(m_blitter_state)]);
|
|
ImGui::Text("Dither: %s", m_GPUSTAT.dither_enable ? "Enabled" : "Disabled");
|
|
ImGui::Text("Draw To Displayed Field: %s", m_GPUSTAT.draw_to_displayed_field ? "Enabled" : "Disabled");
|
|
ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.set_mask_while_drawing ? "Yes" : "No");
|
|
ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.check_mask_before_draw ? "Yes" : "No");
|
|
ImGui::Text("Reverse Flag: %s", m_GPUSTAT.reverse_flag ? "Yes" : "No");
|
|
ImGui::Text("Texture Disable: %s", m_GPUSTAT.texture_disable ? "Yes" : "No");
|
|
ImGui::Text("PAL Mode: %s", m_GPUSTAT.pal_mode ? "Yes" : "No");
|
|
ImGui::Text("Interrupt Request: %s", m_GPUSTAT.interrupt_request ? "Yes" : "No");
|
|
ImGui::Text("DMA Request: %s", m_GPUSTAT.dma_data_request ? "Yes" : "No");
|
|
}
|
|
|
|
if (ImGui::CollapsingHeader("CRTC", ImGuiTreeNodeFlags_DefaultOpen))
|
|
{
|
|
const auto& cs = m_crtc_state;
|
|
ImGui::Text("Clock: %s", (m_console_is_pal ? (m_GPUSTAT.pal_mode ? "PAL-on-PAL" : "NTSC-on-PAL") :
|
|
(m_GPUSTAT.pal_mode ? "PAL-on-NTSC" : "NTSC-on-NTSC")));
|
|
ImGui::Text("Horizontal Frequency: %.3f KHz", ComputeHorizontalFrequency() / 1000.0f);
|
|
ImGui::Text("Vertical Frequency: %.3f Hz", ComputeVerticalFrequency());
|
|
ImGui::Text("Dot Clock Divider: %u", cs.dot_clock_divider);
|
|
ImGui::Text("Vertical Interlace: %s (%s field)", m_GPUSTAT.vertical_interlace ? "Yes" : "No",
|
|
cs.interlaced_field ? "odd" : "even");
|
|
ImGui::Text("Current Scanline: %u (tick %u)", cs.current_scanline, cs.current_tick_in_scanline);
|
|
ImGui::Text("Display Disable: %s", m_GPUSTAT.display_disable ? "Yes" : "No");
|
|
ImGui::Text("Displaying Odd Lines: %s", cs.active_line_lsb ? "Yes" : "No");
|
|
ImGui::Text("Color Depth: %u-bit", m_GPUSTAT.display_area_color_depth_24 ? 24 : 15);
|
|
ImGui::Text("Start Offset in VRAM: (%u, %u)", cs.regs.X.GetValue(), cs.regs.Y.GetValue());
|
|
ImGui::Text("Display Total: %u (%u) horizontal, %u vertical", cs.horizontal_total,
|
|
cs.horizontal_total / cs.dot_clock_divider, cs.vertical_total);
|
|
ImGui::Text("Configured Display Range: %u-%u (%u-%u), %u-%u", cs.regs.X1.GetValue(), cs.regs.X2.GetValue(),
|
|
cs.regs.X1.GetValue() / cs.dot_clock_divider, cs.regs.X2.GetValue() / cs.dot_clock_divider,
|
|
cs.regs.Y1.GetValue(), cs.regs.Y2.GetValue());
|
|
ImGui::Text("Output Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_display_start, cs.horizontal_display_end,
|
|
cs.horizontal_display_start / cs.dot_clock_divider, cs.horizontal_display_end / cs.dot_clock_divider,
|
|
cs.vertical_display_start, cs.vertical_display_end);
|
|
ImGui::Text("Cropping: %s", Settings::GetDisplayCropModeName(g_settings.display_crop_mode));
|
|
ImGui::Text("Visible Display Range: %u-%u (%u-%u), %u-%u", cs.horizontal_visible_start, cs.horizontal_visible_end,
|
|
cs.horizontal_visible_start / cs.dot_clock_divider, cs.horizontal_visible_end / cs.dot_clock_divider,
|
|
cs.vertical_visible_start, cs.vertical_visible_end);
|
|
ImGui::Text("Display Resolution: %ux%u", cs.display_width, cs.display_height);
|
|
ImGui::Text("Display Origin: %u, %u", cs.display_origin_left, cs.display_origin_top);
|
|
ImGui::Text("Displayed/Visible VRAM Portion: %ux%u @ (%u, %u)", cs.display_vram_width, cs.display_vram_height,
|
|
cs.display_vram_left, cs.display_vram_top);
|
|
ImGui::Text("Padding: Left=%d, Top=%d, Right=%d, Bottom=%d", cs.display_origin_left, cs.display_origin_top,
|
|
cs.display_width - cs.display_vram_width - cs.display_origin_left,
|
|
cs.display_height - cs.display_vram_height - cs.display_origin_top);
|
|
}
|
|
}
|
|
|
|
bool GPU::StartRecordingGPUDump(const char* path, u32 num_frames /* = 1 */)
|
|
{
|
|
if (m_gpu_dump)
|
|
StopRecordingGPUDump();
|
|
|
|
// if we're not dumping forever, compute the frame count based on the internal fps
|
|
// +1 because we want to actually see the buffer swap...
|
|
if (num_frames != 0)
|
|
{
|
|
num_frames =
|
|
std::max(num_frames, static_cast<u32>(static_cast<float>(num_frames + 1) *
|
|
std::ceil(PerformanceCounters::GetVPS() / PerformanceCounters::GetFPS())));
|
|
}
|
|
|
|
// ensure vram is up to date
|
|
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
|
|
|
|
std::string osd_key = fmt::format("GPUDump_{}", Path::GetFileName(path));
|
|
Error error;
|
|
m_gpu_dump = GPUDump::Recorder::Create(path, System::GetGameSerial(), num_frames, &error);
|
|
if (!m_gpu_dump)
|
|
{
|
|
Host::AddIconOSDWarning(
|
|
std::move(osd_key), ICON_EMOJI_CAMERA_WITH_FLASH,
|
|
fmt::format("{}\n{}", TRANSLATE_SV("GPU", "Failed to start GPU trace:"), error.GetDescription()),
|
|
Host::OSD_ERROR_DURATION);
|
|
return false;
|
|
}
|
|
|
|
Host::AddIconOSDMessage(
|
|
std::move(osd_key), ICON_EMOJI_CAMERA_WITH_FLASH,
|
|
(num_frames != 0) ?
|
|
fmt::format(TRANSLATE_FS("GPU", "Saving {0} frame GPU trace to '{1}'."), num_frames, Path::GetFileName(path)) :
|
|
fmt::format(TRANSLATE_FS("GPU", "Saving multi-frame frame GPU trace to '{1}'."), num_frames,
|
|
Path::GetFileName(path)),
|
|
Host::OSD_QUICK_DURATION);
|
|
|
|
// save screenshot to same location to identify it
|
|
GPUBackend::RenderScreenshotToFile(Path::ReplaceExtension(path, "png"), DisplayScreenshotMode::ScreenResolution, 85,
|
|
false);
|
|
return true;
|
|
}
|
|
|
|
void GPU::StopRecordingGPUDump()
|
|
{
|
|
if (!m_gpu_dump)
|
|
return;
|
|
|
|
Error error;
|
|
if (!m_gpu_dump->Close(&error))
|
|
{
|
|
Host::AddIconOSDWarning(
|
|
"GPUDump", ICON_EMOJI_CAMERA_WITH_FLASH,
|
|
fmt::format("{}\n{}", TRANSLATE_SV("GPU", "Failed to close GPU trace:"), error.GetDescription()),
|
|
Host::OSD_ERROR_DURATION);
|
|
m_gpu_dump.reset();
|
|
}
|
|
|
|
// Are we compressing the dump?
|
|
const GPUDumpCompressionMode compress_mode =
|
|
Settings::ParseGPUDumpCompressionMode(Host::GetTinyStringSettingValue("GPU", "DumpCompressionMode"))
|
|
.value_or(Settings::DEFAULT_GPU_DUMP_COMPRESSION_MODE);
|
|
std::string osd_key = fmt::format("GPUDump_{}", Path::GetFileName(m_gpu_dump->GetPath()));
|
|
if (compress_mode == GPUDumpCompressionMode::Disabled)
|
|
{
|
|
Host::AddIconOSDMessage(
|
|
"GPUDump", ICON_EMOJI_CAMERA_WITH_FLASH,
|
|
fmt::format(TRANSLATE_FS("GPU", "Saved GPU trace to '{}'."), Path::GetFileName(m_gpu_dump->GetPath())),
|
|
Host::OSD_QUICK_DURATION);
|
|
m_gpu_dump.reset();
|
|
return;
|
|
}
|
|
|
|
std::string source_path = m_gpu_dump->GetPath();
|
|
m_gpu_dump.reset();
|
|
|
|
// Use a 60 second timeout to give it plenty of time to actually save.
|
|
Host::AddIconOSDMessage(
|
|
osd_key, ICON_EMOJI_CAMERA_WITH_FLASH,
|
|
fmt::format(TRANSLATE_FS("GPU", "Compressing GPU trace '{}'..."), Path::GetFileName(source_path)), 60.0f);
|
|
System::QueueAsyncTask([compress_mode, source_path = std::move(source_path), osd_key = std::move(osd_key)]() mutable {
|
|
Error error;
|
|
if (GPUDump::Recorder::Compress(source_path, compress_mode, &error))
|
|
{
|
|
Host::AddIconOSDMessage(
|
|
std::move(osd_key), ICON_EMOJI_CAMERA_WITH_FLASH,
|
|
fmt::format(TRANSLATE_FS("GPU", "Saved GPU trace to '{}'."), Path::GetFileName(source_path)),
|
|
Host::OSD_QUICK_DURATION);
|
|
}
|
|
else
|
|
{
|
|
Host::AddIconOSDWarning(
|
|
std::move(osd_key), ICON_EMOJI_CAMERA_WITH_FLASH,
|
|
fmt::format("{}\n{}",
|
|
SmallString::from_format(TRANSLATE_FS("GPU", "Failed to save GPU trace to '{}':"),
|
|
Path::GetFileName(source_path)),
|
|
error.GetDescription()),
|
|
Host::OSD_ERROR_DURATION);
|
|
}
|
|
});
|
|
}
|
|
|
|
void GPU::WriteCurrentVideoModeToDump(GPUDump::Recorder* dump) const
|
|
{
|
|
dump->WriteGP1Command(GP1Command::SetDisplayDisable, BoolToUInt32(m_GPUSTAT.display_disable));
|
|
dump->WriteGP1Command(GP1Command::SetDisplayStartAddress, m_crtc_state.regs.display_address_start);
|
|
dump->WriteGP1Command(GP1Command::SetHorizontalDisplayRange, m_crtc_state.regs.horizontal_display_range);
|
|
dump->WriteGP1Command(GP1Command::SetVerticalDisplayRange, m_crtc_state.regs.vertical_display_range);
|
|
dump->WriteGP1Command(GP1Command::SetAllowTextureDisable, BoolToUInt32(m_set_texture_disable_mask));
|
|
|
|
// display mode
|
|
GP1SetDisplayMode dispmode = {};
|
|
dispmode.horizontal_resolution_1 = m_GPUSTAT.horizontal_resolution_1.GetValue();
|
|
dispmode.vertical_resolution = m_GPUSTAT.vertical_resolution.GetValue();
|
|
dispmode.pal_mode = m_GPUSTAT.pal_mode.GetValue();
|
|
dispmode.display_area_color_depth = m_GPUSTAT.display_area_color_depth_24.GetValue();
|
|
dispmode.vertical_interlace = m_GPUSTAT.vertical_interlace.GetValue();
|
|
dispmode.horizontal_resolution_2 = m_GPUSTAT.horizontal_resolution_2.GetValue();
|
|
dispmode.reverse_flag = m_GPUSTAT.reverse_flag.GetValue();
|
|
dump->WriteGP1Command(GP1Command::SetDisplayMode, dispmode.bits);
|
|
|
|
// texture window/texture page
|
|
dump->WriteGP0Packet((0xE1u << 24) | ZeroExtend32(m_draw_mode.mode_reg.bits));
|
|
dump->WriteGP0Packet((0xE2u << 24) | m_draw_mode.texture_window_value);
|
|
|
|
// drawing area
|
|
dump->WriteGP0Packet((0xE3u << 24) | static_cast<u32>(m_drawing_area.left) |
|
|
(static_cast<u32>(m_drawing_area.top) << 10));
|
|
dump->WriteGP0Packet((0xE4u << 24) | static_cast<u32>(m_drawing_area.right) |
|
|
(static_cast<u32>(m_drawing_area.bottom) << 10));
|
|
|
|
// drawing offset
|
|
dump->WriteGP0Packet((0xE5u << 24) | (static_cast<u32>(m_drawing_offset.x) & 0x7FFu) |
|
|
((static_cast<u32>(m_drawing_offset.y) & 0x7FFu) << 11));
|
|
|
|
// mask bit
|
|
dump->WriteGP0Packet((0xE6u << 24) | BoolToUInt32(m_GPUSTAT.set_mask_while_drawing) |
|
|
(BoolToUInt32(m_GPUSTAT.check_mask_before_draw) << 1));
|
|
}
|
|
|
|
void GPU::ProcessGPUDumpPacket(GPUDump::PacketType type, const std::span<const u32> data)
|
|
{
|
|
const auto execute_all_commands = [this]() {
|
|
do
|
|
{
|
|
m_pending_command_ticks = 0;
|
|
s_command_tick_event.Deactivate();
|
|
ExecuteCommands();
|
|
} while (m_pending_command_ticks > 0);
|
|
};
|
|
|
|
switch (type)
|
|
{
|
|
case GPUDump::PacketType::GPUPort0Data:
|
|
{
|
|
if (data.empty()) [[unlikely]]
|
|
{
|
|
WARNING_LOG("Empty GPU dump GP0 packet!");
|
|
return;
|
|
}
|
|
|
|
if (data.size() == 1) [[unlikely]]
|
|
{
|
|
// direct GP0 write
|
|
WriteRegister(0, data[0]);
|
|
execute_all_commands();
|
|
}
|
|
else
|
|
{
|
|
// don't overflow the fifo...
|
|
size_t current_word = 0;
|
|
while (current_word < data.size())
|
|
{
|
|
// normally this would be constrained to the "real" fifo size, but VRAM updates also go through here
|
|
// it's easier to just push everything in and execute
|
|
const u32 block_size = std::min(m_fifo.GetSpace(), static_cast<u32>(data.size() - current_word));
|
|
if (block_size == 0)
|
|
{
|
|
ERROR_LOG("FIFO overflow while processing dump packet of {} words", data.size());
|
|
break;
|
|
}
|
|
|
|
for (u32 i = 0; i < block_size; i++)
|
|
m_fifo.Push(ZeroExtend64(data[current_word++]));
|
|
|
|
execute_all_commands();
|
|
;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GPUDump::PacketType::GPUPort1Data:
|
|
{
|
|
if (data.size() != 1) [[unlikely]]
|
|
{
|
|
WARNING_LOG("Incorrectly-sized GPU dump GP1 packet: {} words", data.size());
|
|
return;
|
|
}
|
|
|
|
WriteRegister(4, data[0]);
|
|
}
|
|
break;
|
|
|
|
case GPUDump::PacketType::VSyncEvent:
|
|
{
|
|
// don't play silly buggers with events
|
|
execute_all_commands();
|
|
|
|
// we _should_ be using the tick count for the event, but it breaks with looping.
|
|
// instead, just add a fixed amount
|
|
const TickCount crtc_ticks_per_frame =
|
|
static_cast<TickCount>(m_crtc_state.horizontal_total) * static_cast<TickCount>(m_crtc_state.vertical_total);
|
|
const TickCount system_ticks_per_frame =
|
|
CRTCTicksToSystemTicks(crtc_ticks_per_frame, m_crtc_state.fractional_ticks);
|
|
SystemTicksToCRTCTicks(system_ticks_per_frame, &m_crtc_state.fractional_ticks);
|
|
TimingEvents::SetGlobalTickCounter(TimingEvents::GetGlobalTickCounter() +
|
|
static_cast<GlobalTicks>(system_ticks_per_frame));
|
|
System::IncrementFrameNumber();
|
|
UpdateDisplay(true);
|
|
System::FrameDone();
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|