duckstation/src/core/gpu_backend.cpp

982 lines
32 KiB
C++

// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#include "gpu_backend.h"
#include "gpu.h"
#include "gpu_presenter.h"
#include "gpu_sw_rasterizer.h"
#include "gpu_thread.h"
#include "host.h"
#include "performance_counters.h"
#include "save_state_version.h"
#include "settings.h"
#include "system.h"
#include "system_private.h"
#include "util/gpu_device.h"
#include "util/imgui_manager.h"
#include "util/state_wrapper.h"
#include "common/error.h"
#include "common/file_system.h"
#include "common/log.h"
#include "common/path.h"
#include "common/threading.h"
#include "IconsEmoji.h"
#include "IconsFontAwesome5.h"
#include "fmt/format.h"
LOG_CHANNEL(GPU);
namespace {
struct Counters
{
u32 num_reads;
u32 num_writes;
u32 num_copies;
u32 num_vertices;
u32 num_primitives;
};
struct Stats : Counters
{
size_t host_buffer_streamed;
u32 host_num_draws;
u32 host_num_barriers;
u32 host_num_render_passes;
u32 host_num_copies;
u32 host_num_downloads;
u32 host_num_uploads;
};
struct ALIGN_TO_CACHE_LINE CPUThreadState
{
static constexpr u32 WAIT_NONE = 0;
static constexpr u32 WAIT_CPU_THREAD_WAITING = 1;
static constexpr u32 WAIT_GPU_THREAD_SIGNALING = 2;
static constexpr u32 WAIT_GPU_THREAD_POSTED = 3;
std::atomic<u32> queued_frames;
std::atomic<u32> wait_state;
Threading::KernelSemaphore gpu_thread_wait;
};
} // namespace
static Counters s_counters = {};
static Stats s_stats = {};
static CPUThreadState s_cpu_thread_state = {};
GPUBackend::GPUBackend(GPUPresenter& presenter) : m_presenter(presenter)
{
GPU_SW_Rasterizer::SelectImplementation();
ResetStatistics();
}
GPUBackend::~GPUBackend()
{
m_presenter.ClearDisplayTexture();
}
void GPUBackend::SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config)
{
static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = {
GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0,
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)),
GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0,
GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)),
};
// common state
config.input_layout.vertex_attributes = screen_vertex_attributes;
config.input_layout.vertex_stride = sizeof(ScreenVertex);
config.primitive = GPUPipeline::Primitive::TriangleStrips;
}
GSVector4 GPUBackend::GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size)
{
const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw());
const GSVector2 fsize = GSVector2(rt_size);
const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f);
const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy()));
return GSVector4::xyxy(x, y).xzyw();
}
void GPUBackend::DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size,
const GSVector4 uv_bounds /* = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f) */)
{
const GSVector4 xy = GetScreenQuadClipSpaceCoordinates(bounds, rt_size);
ScreenVertex* vertices;
u32 space;
u32 base_vertex;
g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast<void**>(&vertices), &space, &base_vertex);
vertices[0].Set(xy.xy(), uv_bounds.xy());
vertices[1].Set(xy.zyzw().xy(), uv_bounds.zyzw().xy());
vertices[2].Set(xy.xwzw().xy(), uv_bounds.xwzw().xy());
vertices[3].Set(xy.zw(), uv_bounds.zw());
g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4);
g_gpu_device->Draw(4, base_vertex);
}
bool GPUBackend::Initialize(bool clear_vram, Error* error)
{
m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area);
return true;
}
bool GPUBackend::UpdateSettings(const GPUSettings& old_settings, Error* error)
{
if (g_gpu_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats)
GPUBackend::ResetStatistics();
return true;
}
void GPUBackend::UpdatePostProcessingSettings(bool force_reload)
{
}
GPUThreadCommand* GPUBackend::NewClearVRAMCommand()
{
return static_cast<GPUThreadCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::ClearVRAM, sizeof(GPUThreadCommand)));
}
GPUThreadCommand* GPUBackend::NewClearDisplayCommand()
{
return static_cast<GPUThreadCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::ClearDisplay, sizeof(GPUThreadCommand)));
}
GPUBackendUpdateDisplayCommand* GPUBackend::NewUpdateDisplayCommand()
{
return static_cast<GPUBackendUpdateDisplayCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::UpdateDisplay, sizeof(GPUBackendUpdateDisplayCommand)));
}
GPUBackendSubmitFrameCommand* GPUBackend::NewSubmitFrameCommand()
{
return static_cast<GPUBackendSubmitFrameCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::SubmitFrame, sizeof(GPUBackendUpdateDisplayCommand)));
}
GPUThreadCommand* GPUBackend::NewClearCacheCommand()
{
return static_cast<GPUThreadCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::ClearCache, sizeof(GPUThreadCommand)));
}
GPUThreadCommand* GPUBackend::NewBufferSwappedCommand()
{
return static_cast<GPUThreadCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::BufferSwapped, sizeof(GPUThreadCommand)));
}
GPUBackendReadVRAMCommand* GPUBackend::NewReadVRAMCommand()
{
return static_cast<GPUBackendReadVRAMCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::ReadVRAM, sizeof(GPUBackendReadVRAMCommand)));
}
GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand()
{
return static_cast<GPUBackendFillVRAMCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::FillVRAM, sizeof(GPUBackendFillVRAMCommand)));
}
GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words)
{
const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16));
GPUBackendUpdateVRAMCommand* cmd =
static_cast<GPUBackendUpdateVRAMCommand*>(GPUThread::AllocateCommand(GPUBackendCommandType::UpdateVRAM, size));
return cmd;
}
GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand()
{
return static_cast<GPUBackendCopyVRAMCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::CopyVRAM, sizeof(GPUBackendCopyVRAMCommand)));
}
GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand()
{
return static_cast<GPUBackendSetDrawingAreaCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand)));
}
GPUBackendUpdateCLUTCommand* GPUBackend::NewUpdateCLUTCommand()
{
return static_cast<GPUBackendUpdateCLUTCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand)));
}
GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices)
{
const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex));
GPUBackendDrawPolygonCommand* cmd =
static_cast<GPUBackendDrawPolygonCommand*>(GPUThread::AllocateCommand(GPUBackendCommandType::DrawPolygon, size));
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
GPUBackendDrawPrecisePolygonCommand* GPUBackend::NewDrawPrecisePolygonCommand(u32 num_vertices)
{
const u32 size =
sizeof(GPUBackendDrawPrecisePolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex));
GPUBackendDrawPrecisePolygonCommand* cmd = static_cast<GPUBackendDrawPrecisePolygonCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::DrawPrecisePolygon, size));
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand()
{
return static_cast<GPUBackendDrawRectangleCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::DrawRectangle, sizeof(GPUBackendDrawRectangleCommand)));
}
GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices)
{
const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex));
GPUBackendDrawLineCommand* cmd =
static_cast<GPUBackendDrawLineCommand*>(GPUThread::AllocateCommand(GPUBackendCommandType::DrawLine, size));
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
GPUBackendDrawPreciseLineCommand* GPUBackend::NewDrawPreciseLineCommand(u32 num_vertices)
{
const u32 size =
sizeof(GPUBackendDrawPreciseLineCommand) + (num_vertices * sizeof(GPUBackendDrawPreciseLineCommand::Vertex));
GPUBackendDrawPreciseLineCommand* cmd = static_cast<GPUBackendDrawPreciseLineCommand*>(
GPUThread::AllocateCommand(GPUBackendCommandType::DrawPreciseLine, size));
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
void GPUBackend::PushCommand(GPUThreadCommand* cmd)
{
GPUThread::PushCommand(cmd);
}
void GPUBackend::PushCommandAndWakeThread(GPUThreadCommand* cmd)
{
GPUThread::PushCommandAndWakeThread(cmd);
}
void GPUBackend::PushCommandAndSync(GPUThreadCommand* cmd, bool spin)
{
GPUThread::PushCommandAndSync(cmd, spin);
}
void GPUBackend::SyncGPUThread(bool spin)
{
GPUThread::SyncGPUThread(spin);
}
bool GPUBackend::IsUsingHardwareBackend()
{
return (GPUThread::GetRequestedRenderer().value_or(GPURenderer::Software) != GPURenderer::Software);
}
bool GPUBackend::BeginQueueFrame()
{
const u32 queued_frames = s_cpu_thread_state.queued_frames.fetch_add(1, std::memory_order_acq_rel) + 1;
if (queued_frames <= g_settings.gpu_max_queued_frames)
return false;
if (g_settings.gpu_max_queued_frames > 0)
DEV_LOG("<-- {} queued frames, {} max, blocking CPU thread", queued_frames, g_settings.gpu_max_queued_frames);
s_cpu_thread_state.wait_state.store(CPUThreadState::WAIT_CPU_THREAD_WAITING, std::memory_order_release);
return true;
}
void GPUBackend::WaitForOneQueuedFrame()
{
// Inbetween this and the post call, we may have finished the frame. Check.
if (s_cpu_thread_state.queued_frames.load(std::memory_order_acquire) <= g_settings.gpu_max_queued_frames)
{
// It's possible that the GPU thread has already signaled the semaphore.
// If so, then we still need to drain it, otherwise waits in the future will return prematurely.
u32 expected = CPUThreadState::WAIT_CPU_THREAD_WAITING;
if (s_cpu_thread_state.wait_state.compare_exchange_strong(expected, CPUThreadState::WAIT_NONE,
std::memory_order_acq_rel, std::memory_order_acquire))
{
return;
}
}
s_cpu_thread_state.gpu_thread_wait.Wait();
// Depending on where the GPU thread is, now we can either be in WAIT_GPU_THREAD_SIGNALING or WAIT_GPU_THREAD_POSTED
// state. We want to clear the flag here regardless, so a store-release is fine. Because the GPU thread has a
// compare-exchange on WAIT_GPU_THREAD_SIGNALING, it can't "overwrite" the value we store here.
s_cpu_thread_state.wait_state.store(CPUThreadState::WAIT_NONE, std::memory_order_release);
// Sanity check: queued frames should be in range now. If they're not, we fucked up the semaphore.
if (const u32 queued_frames = s_cpu_thread_state.queued_frames.load(std::memory_order_acquire);
queued_frames > g_settings.gpu_max_queued_frames) [[unlikely]]
{
ERROR_LOG("queued_frames {} above max queued frames {} after CPU wait", queued_frames,
g_settings.gpu_max_queued_frames);
}
}
u32 GPUBackend::GetQueuedFrameCount()
{
return s_cpu_thread_state.queued_frames.load(std::memory_order_acquire);
}
void GPUBackend::ReleaseQueuedFrame()
{
s_cpu_thread_state.queued_frames.fetch_sub(1, std::memory_order_acq_rel);
// We need two states here in case we get preempted in between the compare_exchange_strong() and Post().
// This means that we will only release the semaphore once the CPU is guaranteed to be in a waiting state,
// and ensure that we don't post twice if the CPU thread lags and we process 2 frames before it wakes up.
u32 expected = CPUThreadState::WAIT_CPU_THREAD_WAITING;
if (s_cpu_thread_state.wait_state.compare_exchange_strong(expected, CPUThreadState::WAIT_GPU_THREAD_SIGNALING,
std::memory_order_acq_rel, std::memory_order_acquire))
{
if (g_settings.gpu_max_queued_frames > 0)
DEV_LOG("--> Unblocking CPU thread");
s_cpu_thread_state.gpu_thread_wait.Post();
// This needs to be a compare_exchange, because the CPU thread can clear the flag before we execute this line.
expected = CPUThreadState::WAIT_GPU_THREAD_SIGNALING;
s_cpu_thread_state.wait_state.compare_exchange_strong(expected, CPUThreadState::WAIT_GPU_THREAD_POSTED,
std::memory_order_acq_rel, std::memory_order_acquire);
}
}
bool GPUBackend::AllocateMemorySaveStates(std::span<System::MemorySaveState> states, Error* error)
{
bool result;
GPUThread::RunOnBackend(
[states, error, &result](GPUBackend* backend) {
// Free old textures first.
for (size_t i = 0; i < states.size(); i++)
g_gpu_device->RecycleTexture(std::move(states[i].vram_texture));
// Maximize potential for texture reuse by flushing the current command buffer.
g_gpu_device->WaitForGPUIdle();
for (size_t i = 0; i < states.size(); i++)
{
if (!backend->AllocateMemorySaveState(states[i], error))
{
// Try flushing the pool.
WARNING_LOG("Failed to allocate memory save state texture, trying flushing pool.");
g_gpu_device->PurgeTexturePool();
g_gpu_device->WaitForGPUIdle();
if (!backend->AllocateMemorySaveState(states[i], error))
{
// Free anything that was allocated.
for (size_t j = 0; j <= i; i++)
{
states[j].state_data.deallocate();
states[j].vram_texture.reset();
result = false;
return;
}
}
}
}
backend->RestoreDeviceContext();
result = true;
},
true, false);
return result;
}
void GPUBackend::QueueUpdateResolutionScale()
{
DebugAssert(!GPUThread::IsOnThread());
GPUThread::RunOnBackend(
[](GPUBackend* backend) {
Error error;
if (!backend->UpdateResolutionScale(&error)) [[unlikely]]
GPUThread::ReportFatalErrorAndShutdown(
fmt::format("Failed to update resolution scale: {}", error.GetDescription()));
},
false, true);
}
void GPUBackend::HandleCommand(const GPUThreadCommand* cmd)
{
switch (cmd->type)
{
case GPUBackendCommandType::ClearVRAM:
{
ClearVRAM();
}
break;
case GPUBackendCommandType::LoadState:
{
LoadState(static_cast<const GPUBackendLoadStateCommand*>(cmd));
}
break;
case GPUBackendCommandType::LoadMemoryState:
{
System::MemorySaveState& mss = *static_cast<const GPUBackendDoMemoryStateCommand*>(cmd)->memory_save_state;
StateWrapper sw(mss.gpu_state_data.span(0, mss.gpu_state_size), StateWrapper::Mode::Read, SAVE_STATE_VERSION);
DoMemoryState(sw, mss);
}
break;
case GPUBackendCommandType::SaveMemoryState:
{
System::MemorySaveState& mss = *static_cast<const GPUBackendDoMemoryStateCommand*>(cmd)->memory_save_state;
StateWrapper sw(mss.gpu_state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION);
DoMemoryState(sw, mss);
mss.gpu_state_size = static_cast<u32>(sw.GetPosition());
}
break;
case GPUBackendCommandType::ClearDisplay:
{
m_presenter.ClearDisplay();
}
break;
case GPUBackendCommandType::UpdateDisplay:
{
HandleUpdateDisplayCommand(static_cast<const GPUBackendUpdateDisplayCommand*>(cmd));
}
break;
case GPUBackendCommandType::SubmitFrame:
{
HandleSubmitFrameCommand(&static_cast<const GPUBackendSubmitFrameCommand*>(cmd)->frame);
}
break;
case GPUBackendCommandType::ClearCache:
{
ClearCache();
}
break;
case GPUBackendCommandType::BufferSwapped:
{
OnBufferSwapped();
}
break;
case GPUBackendCommandType::ReadVRAM:
{
const GPUBackendReadVRAMCommand* ccmd = static_cast<const GPUBackendReadVRAMCommand*>(cmd);
s_counters.num_reads++;
ReadVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height));
}
break;
case GPUBackendCommandType::FillVRAM:
{
const GPUBackendFillVRAMCommand* ccmd = static_cast<const GPUBackendFillVRAMCommand*>(cmd);
FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
ccmd->color, ccmd->interlaced_rendering, ccmd->active_line_lsb);
}
break;
case GPUBackendCommandType::UpdateVRAM:
{
const GPUBackendUpdateVRAMCommand* ccmd = static_cast<const GPUBackendUpdateVRAMCommand*>(cmd);
s_counters.num_writes++;
UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
ccmd->data, ccmd->set_mask_while_drawing, ccmd->check_mask_before_draw);
}
break;
case GPUBackendCommandType::CopyVRAM:
{
const GPUBackendCopyVRAMCommand* ccmd = static_cast<const GPUBackendCopyVRAMCommand*>(cmd);
s_counters.num_copies++;
CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x),
ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
ccmd->set_mask_while_drawing, ccmd->check_mask_before_draw);
}
break;
case GPUBackendCommandType::SetDrawingArea:
{
const GPUBackendSetDrawingAreaCommand* ccmd = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd);
GPU_SW_Rasterizer::g_drawing_area = ccmd->new_area;
m_clamped_drawing_area = GPU::GetClampedDrawingArea(ccmd->new_area);
DrawingAreaChanged();
}
break;
case GPUBackendCommandType::UpdateCLUT:
{
const GPUBackendUpdateCLUTCommand* ccmd = static_cast<const GPUBackendUpdateCLUTCommand*>(cmd);
GPU_SW_Rasterizer::UpdateCLUT(ccmd->reg, ccmd->clut_is_8bit);
}
break;
case GPUBackendCommandType::DrawPolygon:
{
const GPUBackendDrawPolygonCommand* ccmd = static_cast<const GPUBackendDrawPolygonCommand*>(cmd);
s_counters.num_vertices += ccmd->num_vertices;
s_counters.num_primitives++;
DrawPolygon(ccmd);
}
break;
case GPUBackendCommandType::DrawPrecisePolygon:
{
const GPUBackendDrawPolygonCommand* ccmd = static_cast<const GPUBackendDrawPolygonCommand*>(cmd);
s_counters.num_vertices += ccmd->num_vertices;
s_counters.num_primitives++;
DrawPrecisePolygon(static_cast<const GPUBackendDrawPrecisePolygonCommand*>(cmd));
}
break;
case GPUBackendCommandType::DrawRectangle:
{
const GPUBackendDrawRectangleCommand* ccmd = static_cast<const GPUBackendDrawRectangleCommand*>(cmd);
s_counters.num_vertices++;
s_counters.num_primitives++;
DrawSprite(ccmd);
}
break;
case GPUBackendCommandType::DrawLine:
{
const GPUBackendDrawLineCommand* ccmd = static_cast<const GPUBackendDrawLineCommand*>(cmd);
s_counters.num_vertices += ccmd->num_vertices;
s_counters.num_primitives += ccmd->num_vertices / 2;
DrawLine(ccmd);
}
break;
case GPUBackendCommandType::DrawPreciseLine:
{
const GPUBackendDrawPreciseLineCommand* ccmd = static_cast<const GPUBackendDrawPreciseLineCommand*>(cmd);
s_counters.num_vertices += ccmd->num_vertices;
s_counters.num_primitives += ccmd->num_vertices / 2;
DrawPreciseLine(ccmd);
}
break;
DefaultCaseIsUnreachable();
}
}
void GPUBackend::HandleUpdateDisplayCommand(const GPUBackendUpdateDisplayCommand* cmd)
{
// Height has to be doubled because we halved it on the GPU side.
m_presenter.SetDisplayParameters(
cmd->display_width, cmd->display_height, cmd->display_origin_left, cmd->display_origin_top, cmd->display_vram_width,
cmd->display_vram_height << BoolToUInt32(cmd->interlaced_display_enabled), cmd->display_pixel_aspect_ratio);
UpdateDisplay(cmd);
if (cmd->submit_frame)
HandleSubmitFrameCommand(&cmd->frame);
}
void GPUBackend::HandleSubmitFrameCommand(const GPUBackendFramePresentationParameters* cmd)
{
// For regtest.
Host::FrameDoneOnGPUThread(this, cmd->frame_number);
if (cmd->media_capture)
m_presenter.SendDisplayToMediaCapture(cmd->media_capture);
// If this returns false, our backend object is deleted and replaced with null, so bail out.
if (cmd->present_frame)
{
const bool result = m_presenter.PresentFrame(&m_presenter, this, cmd->allow_present_skip, cmd->present_time);
ReleaseQueuedFrame();
if (!result)
return;
}
// Update perf counters *after* throttling, we want to measure from start-of-frame
// to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different
// amounts of computation happening in each frame).
if (cmd->update_performance_counters)
PerformanceCounters::Update(this, cmd->frame_number, cmd->internal_frame_number);
RestoreDeviceContext();
}
void GPUBackend::GetStatsString(SmallStringBase& str) const
{
if (IsUsingHardwareBackend())
{
str.format("{}{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W",
GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), g_gpu_settings.gpu_use_thread ? "-MT" : "",
s_stats.num_primitives, s_stats.host_num_draws, s_stats.host_num_barriers,
s_stats.host_num_render_passes, s_stats.host_num_downloads, s_stats.num_copies, s_stats.num_writes);
}
else
{
str.format("{}{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()),
g_gpu_settings.gpu_use_thread ? "-MT" : "", s_stats.num_primitives, s_stats.num_reads,
s_stats.num_copies, s_stats.num_writes);
}
}
void GPUBackend::GetMemoryStatsString(SmallStringBase& str) const
{
const u32 vram_usage_mb = static_cast<u32>((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576);
const u32 stream_kb = static_cast<u32>((s_stats.host_buffer_streamed + (1024 - 1)) / 1024);
str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, s_stats.host_num_copies,
s_stats.host_num_uploads);
}
void GPUBackend::ResetStatistics()
{
s_counters = {};
g_gpu_device->ResetStatistics();
}
void GPUBackend::UpdateStatistics(u32 frame_count)
{
const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics();
const u32 round = (frame_count - 1);
#define UPDATE_COUNTER(x) s_stats.x = (s_counters.x + round) / frame_count
#define UPDATE_GPU_STAT(x) s_stats.host_##x = (stats.x + round) / frame_count
UPDATE_COUNTER(num_reads);
UPDATE_COUNTER(num_writes);
UPDATE_COUNTER(num_copies);
UPDATE_COUNTER(num_vertices);
UPDATE_COUNTER(num_primitives);
// UPDATE_COUNTER(num_read_texture_updates);
// UPDATE_COUNTER(num_ubo_updates);
UPDATE_GPU_STAT(buffer_streamed);
UPDATE_GPU_STAT(num_draws);
UPDATE_GPU_STAT(num_barriers);
UPDATE_GPU_STAT(num_render_passes);
UPDATE_GPU_STAT(num_copies);
UPDATE_GPU_STAT(num_downloads);
UPDATE_GPU_STAT(num_uploads);
#undef UPDATE_GPU_STAT
#undef UPDATE_COUNTER
ResetStatistics();
}
bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, bool apply_aspect_ratio, Image* out_image,
Error* error)
{
bool result;
GPUThread::RunOnBackend(
[width, height, postfx, apply_aspect_ratio, out_image, error, &result](GPUBackend* backend) {
if (!backend)
{
Error::SetStringView(error, "No GPU backend.");
result = false;
return;
}
// Post-processing requires that the size match the window.
const bool really_postfx = postfx && g_gpu_device->HasMainSwapChain();
u32 image_width, image_height;
if (really_postfx)
{
image_width = g_gpu_device->GetMainSwapChain()->GetWidth();
image_height = g_gpu_device->GetMainSwapChain()->GetHeight();
}
else
{
// Crop it if border overlay isn't enabled.
GSVector4i draw_rect, display_rect;
backend->GetPresenter().CalculateDrawRect(static_cast<s32>(width), static_cast<s32>(height), apply_aspect_ratio,
false, &display_rect, &draw_rect);
image_width = static_cast<u32>(display_rect.width());
image_height = static_cast<u32>(display_rect.height());
}
result = backend->GetPresenter().RenderScreenshotToBuffer(image_width, image_height, really_postfx,
apply_aspect_ratio, out_image, error);
backend->RestoreDeviceContext();
},
true, false);
return result;
}
void GPUBackend::RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality,
bool show_osd_message)
{
GPUThread::RunOnBackend(
[path = std::string(path), mode, quality, show_osd_message](GPUBackend* backend) mutable {
if (!backend)
return;
const GSVector2i size = backend->GetPresenter().CalculateScreenshotSize(mode);
if (size.x == 0 || size.y == 0)
return;
std::string osd_key;
if (show_osd_message)
osd_key = fmt::format("ScreenshotSaver_{}", path);
const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution);
const bool apply_aspect_ratio = (mode != DisplayScreenshotMode::UncorrectedInternalResolution);
Error error;
Image image;
if (!backend->m_presenter.RenderScreenshotToBuffer(size.x, size.y, !internal_resolution, apply_aspect_ratio,
&image, &error))
{
ERROR_LOG("Failed to render {}x{} screenshot: {}", size.x, size.y, error.GetDescription());
if (show_osd_message)
{
Host::AddIconOSDWarning(
std::move(osd_key), ICON_EMOJI_WARNING,
fmt::format(TRANSLATE_FS("GPU", "Failed to save screenshot:\n{}"), error.GetDescription()));
}
backend->RestoreDeviceContext();
return;
}
// no more GPU calls
backend->RestoreDeviceContext();
auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error);
if (!fp)
{
ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription());
if (show_osd_message)
{
Host::AddIconOSDWarning(
std::move(osd_key), ICON_EMOJI_WARNING,
fmt::format(TRANSLATE_FS("GPU", "Failed to save screenshot:\n{}"), error.GetDescription()));
}
return;
}
if (show_osd_message)
{
// Use a 60 second timeout to give it plenty of time to actually save.
Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH,
fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(path)),
60.0f);
}
System::QueueAsyncTask([path = std::move(path), fp = fp.release(), quality,
flip_y = g_gpu_device->UsesLowerLeftOrigin(), image = std::move(image),
osd_key = std::move(osd_key)]() mutable {
Error error;
if (flip_y)
image.FlipY();
if (image.GetFormat() != ImageFormat::RGBA8)
{
std::optional<Image> convert_image = image.ConvertToRGBA8(&error);
if (!convert_image.has_value())
{
ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", Image::GetFormatName(image.GetFormat()),
error.GetDescription());
image.Invalidate();
}
else
{
image = std::move(convert_image.value());
}
}
bool result = false;
if (image.IsValid())
{
image.SetAllPixelsOpaque();
result = image.SaveToFile(path.c_str(), fp, quality, &error);
if (!result)
ERROR_LOG("Failed to save screenshot to '{}': '{}'", Path::GetFileName(path), error.GetDescription());
}
if (!osd_key.empty())
{
Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA,
fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") :
TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."),
Path::GetFileName(path),
result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION));
}
std::fclose(fp);
return result;
});
},
false, false);
}
namespace {
class GPUNullBackend final : public GPUBackend
{
public:
GPUNullBackend(GPUPresenter& presenter);
~GPUNullBackend() override;
bool Initialize(bool upload_vram, Error* error) override;
bool UpdateSettings(const GPUSettings& old_settings, Error* error) override;
u32 GetResolutionScale() const override;
bool UpdateResolutionScale(Error* error) override;
void RestoreDeviceContext() override;
void FlushRender() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering,
u8 interlaced_display_field) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask,
bool check_mask) override;
void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override;
void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override;
void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override;
void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
void DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) override;
void DrawingAreaChanged() override;
void ClearCache() override;
void OnBufferSwapped() override;
void ClearVRAM() override;
void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) override;
void LoadState(const GPUBackendLoadStateCommand* cmd) override;
bool AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) override;
void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) override;
};
} // namespace
GPUNullBackend::GPUNullBackend(GPUPresenter& presenter) : GPUBackend(presenter)
{
}
GPUNullBackend::~GPUNullBackend() = default;
bool GPUNullBackend::Initialize(bool upload_vram, Error* error)
{
return GPUBackend::Initialize(upload_vram, error);
}
bool GPUNullBackend::UpdateSettings(const GPUSettings& old_settings, Error* error)
{
return GPUBackend::UpdateSettings(old_settings, error);
}
u32 GPUNullBackend::GetResolutionScale() const
{
return 1;
}
bool GPUNullBackend::UpdateResolutionScale(Error* error)
{
return true;
}
void GPUNullBackend::RestoreDeviceContext()
{
}
void GPUNullBackend::FlushRender()
{
}
void GPUNullBackend::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
}
void GPUNullBackend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering,
u8 interlaced_display_field)
{
}
void GPUNullBackend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask)
{
}
void GPUNullBackend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask,
bool check_mask)
{
}
void GPUNullBackend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
{
}
void GPUNullBackend::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd)
{
}
void GPUNullBackend::DrawSprite(const GPUBackendDrawRectangleCommand* cmd)
{
}
void GPUNullBackend::DrawLine(const GPUBackendDrawLineCommand* cmd)
{
}
void GPUNullBackend::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)
{
}
void GPUNullBackend::DrawingAreaChanged()
{
}
void GPUNullBackend::ClearCache()
{
}
void GPUNullBackend::OnBufferSwapped()
{
}
void GPUNullBackend::ClearVRAM()
{
}
void GPUNullBackend::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd)
{
}
void GPUNullBackend::LoadState(const GPUBackendLoadStateCommand* cmd)
{
}
bool GPUNullBackend::AllocateMemorySaveState(System::MemorySaveState& mss, Error* error)
{
return false;
}
void GPUNullBackend::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss)
{
}
std::unique_ptr<GPUBackend> GPUBackend::CreateNullBackend(GPUPresenter& presenter)
{
return std::make_unique<GPUNullBackend>(presenter);
}