// SPDX-FileCopyrightText: 2019-2025 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "gpu_backend.h" #include "gpu.h" #include "gpu_presenter.h" #include "gpu_sw_rasterizer.h" #include "gpu_thread.h" #include "host.h" #include "performance_counters.h" #include "save_state_version.h" #include "settings.h" #include "system.h" #include "system_private.h" #include "util/gpu_device.h" #include "util/imgui_manager.h" #include "util/state_wrapper.h" #include "common/error.h" #include "common/file_system.h" #include "common/log.h" #include "common/path.h" #include "common/threading.h" #include "IconsEmoji.h" #include "IconsFontAwesome5.h" #include "fmt/format.h" LOG_CHANNEL(GPU); namespace { struct Counters { u32 num_reads; u32 num_writes; u32 num_copies; u32 num_vertices; u32 num_primitives; }; struct Stats : Counters { size_t host_buffer_streamed; u32 host_num_draws; u32 host_num_barriers; u32 host_num_render_passes; u32 host_num_copies; u32 host_num_downloads; u32 host_num_uploads; }; struct ALIGN_TO_CACHE_LINE CPUThreadState { static constexpr u32 WAIT_NONE = 0; static constexpr u32 WAIT_CPU_THREAD_WAITING = 1; static constexpr u32 WAIT_GPU_THREAD_SIGNALING = 2; static constexpr u32 WAIT_GPU_THREAD_POSTED = 3; std::atomic queued_frames; std::atomic wait_state; Threading::KernelSemaphore gpu_thread_wait; }; } // namespace static Counters s_counters = {}; static Stats s_stats = {}; static CPUThreadState s_cpu_thread_state = {}; GPUBackend::GPUBackend(GPUPresenter& presenter) : m_presenter(presenter) { GPU_SW_Rasterizer::SelectImplementation(); ResetStatistics(); } GPUBackend::~GPUBackend() { m_presenter.ClearDisplayTexture(); } void GPUBackend::SetScreenQuadInputLayout(GPUPipeline::GraphicsConfig& config) { static constexpr GPUPipeline::VertexAttribute screen_vertex_attributes[] = { GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, x)), GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0, GPUPipeline::VertexAttribute::Type::Float, 2, OFFSETOF(ScreenVertex, u)), }; // common state config.input_layout.vertex_attributes = screen_vertex_attributes; config.input_layout.vertex_stride = sizeof(ScreenVertex); config.primitive = GPUPipeline::Primitive::TriangleStrips; } GSVector4 GPUBackend::GetScreenQuadClipSpaceCoordinates(const GSVector4i bounds, const GSVector2i rt_size) { const GSVector4 fboundsxxyy = GSVector4(bounds.xzyw()); const GSVector2 fsize = GSVector2(rt_size); const GSVector2 x = ((fboundsxxyy.xy() * GSVector2::cxpr(2.0f)) / fsize.xx()) - GSVector2::cxpr(1.0f); const GSVector2 y = GSVector2::cxpr(1.0f) - (GSVector2::cxpr(2.0f) * (fboundsxxyy.zw() / fsize.yy())); return GSVector4::xyxy(x, y).xzyw(); } void GPUBackend::DrawScreenQuad(const GSVector4i bounds, const GSVector2i rt_size, const GSVector4 uv_bounds /* = GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f) */) { const GSVector4 xy = GetScreenQuadClipSpaceCoordinates(bounds, rt_size); ScreenVertex* vertices; u32 space; u32 base_vertex; g_gpu_device->MapVertexBuffer(sizeof(ScreenVertex), 4, reinterpret_cast(&vertices), &space, &base_vertex); vertices[0].Set(xy.xy(), uv_bounds.xy()); vertices[1].Set(xy.zyzw().xy(), uv_bounds.zyzw().xy()); vertices[2].Set(xy.xwzw().xy(), uv_bounds.xwzw().xy()); vertices[3].Set(xy.zw(), uv_bounds.zw()); g_gpu_device->UnmapVertexBuffer(sizeof(ScreenVertex), 4); g_gpu_device->Draw(4, base_vertex); } bool GPUBackend::Initialize(bool clear_vram, Error* error) { m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area); return true; } bool GPUBackend::UpdateSettings(const GPUSettings& old_settings, Error* error) { if (g_gpu_settings.display_show_gpu_stats != old_settings.display_show_gpu_stats) GPUBackend::ResetStatistics(); return true; } void GPUBackend::UpdatePostProcessingSettings(bool force_reload) { } GPUThreadCommand* GPUBackend::NewClearVRAMCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::ClearVRAM, sizeof(GPUThreadCommand))); } GPUThreadCommand* GPUBackend::NewClearDisplayCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::ClearDisplay, sizeof(GPUThreadCommand))); } GPUBackendUpdateDisplayCommand* GPUBackend::NewUpdateDisplayCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::UpdateDisplay, sizeof(GPUBackendUpdateDisplayCommand))); } GPUBackendSubmitFrameCommand* GPUBackend::NewSubmitFrameCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::SubmitFrame, sizeof(GPUBackendUpdateDisplayCommand))); } GPUThreadCommand* GPUBackend::NewClearCacheCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::ClearCache, sizeof(GPUThreadCommand))); } GPUThreadCommand* GPUBackend::NewBufferSwappedCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::BufferSwapped, sizeof(GPUThreadCommand))); } GPUBackendReadVRAMCommand* GPUBackend::NewReadVRAMCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::ReadVRAM, sizeof(GPUBackendReadVRAMCommand))); } GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::FillVRAM, sizeof(GPUBackendFillVRAMCommand))); } GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words) { const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16)); GPUBackendUpdateVRAMCommand* cmd = static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::UpdateVRAM, size)); return cmd; } GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::CopyVRAM, sizeof(GPUBackendCopyVRAMCommand))); } GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::SetDrawingArea, sizeof(GPUBackendSetDrawingAreaCommand))); } GPUBackendUpdateCLUTCommand* GPUBackend::NewUpdateCLUTCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::UpdateCLUT, sizeof(GPUBackendUpdateCLUTCommand))); } GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex)); GPUBackendDrawPolygonCommand* cmd = static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::DrawPolygon, size)); cmd->num_vertices = Truncate16(num_vertices); return cmd; } GPUBackendDrawPrecisePolygonCommand* GPUBackend::NewDrawPrecisePolygonCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawPrecisePolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); GPUBackendDrawPrecisePolygonCommand* cmd = static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::DrawPrecisePolygon, size)); cmd->num_vertices = Truncate16(num_vertices); return cmd; } GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand() { return static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::DrawRectangle, sizeof(GPUBackendDrawRectangleCommand))); } GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex)); GPUBackendDrawLineCommand* cmd = static_cast(GPUThread::AllocateCommand(GPUBackendCommandType::DrawLine, size)); cmd->num_vertices = Truncate16(num_vertices); return cmd; } GPUBackendDrawPreciseLineCommand* GPUBackend::NewDrawPreciseLineCommand(u32 num_vertices) { const u32 size = sizeof(GPUBackendDrawPreciseLineCommand) + (num_vertices * sizeof(GPUBackendDrawPreciseLineCommand::Vertex)); GPUBackendDrawPreciseLineCommand* cmd = static_cast( GPUThread::AllocateCommand(GPUBackendCommandType::DrawPreciseLine, size)); cmd->num_vertices = Truncate16(num_vertices); return cmd; } void GPUBackend::PushCommand(GPUThreadCommand* cmd) { GPUThread::PushCommand(cmd); } void GPUBackend::PushCommandAndWakeThread(GPUThreadCommand* cmd) { GPUThread::PushCommandAndWakeThread(cmd); } void GPUBackend::PushCommandAndSync(GPUThreadCommand* cmd, bool spin) { GPUThread::PushCommandAndSync(cmd, spin); } void GPUBackend::SyncGPUThread(bool spin) { GPUThread::SyncGPUThread(spin); } bool GPUBackend::IsUsingHardwareBackend() { return (GPUThread::GetRequestedRenderer().value_or(GPURenderer::Software) != GPURenderer::Software); } bool GPUBackend::BeginQueueFrame() { const u32 queued_frames = s_cpu_thread_state.queued_frames.fetch_add(1, std::memory_order_acq_rel) + 1; if (queued_frames <= g_settings.gpu_max_queued_frames) return false; if (g_settings.gpu_max_queued_frames > 0) DEV_LOG("<-- {} queued frames, {} max, blocking CPU thread", queued_frames, g_settings.gpu_max_queued_frames); s_cpu_thread_state.wait_state.store(CPUThreadState::WAIT_CPU_THREAD_WAITING, std::memory_order_release); return true; } void GPUBackend::WaitForOneQueuedFrame() { // Inbetween this and the post call, we may have finished the frame. Check. if (s_cpu_thread_state.queued_frames.load(std::memory_order_acquire) <= g_settings.gpu_max_queued_frames) { // It's possible that the GPU thread has already signaled the semaphore. // If so, then we still need to drain it, otherwise waits in the future will return prematurely. u32 expected = CPUThreadState::WAIT_CPU_THREAD_WAITING; if (s_cpu_thread_state.wait_state.compare_exchange_strong(expected, CPUThreadState::WAIT_NONE, std::memory_order_acq_rel, std::memory_order_acquire)) { return; } } s_cpu_thread_state.gpu_thread_wait.Wait(); // Depending on where the GPU thread is, now we can either be in WAIT_GPU_THREAD_SIGNALING or WAIT_GPU_THREAD_POSTED // state. We want to clear the flag here regardless, so a store-release is fine. Because the GPU thread has a // compare-exchange on WAIT_GPU_THREAD_SIGNALING, it can't "overwrite" the value we store here. s_cpu_thread_state.wait_state.store(CPUThreadState::WAIT_NONE, std::memory_order_release); // Sanity check: queued frames should be in range now. If they're not, we fucked up the semaphore. if (const u32 queued_frames = s_cpu_thread_state.queued_frames.load(std::memory_order_acquire); queued_frames > g_settings.gpu_max_queued_frames) [[unlikely]] { ERROR_LOG("queued_frames {} above max queued frames {} after CPU wait", queued_frames, g_settings.gpu_max_queued_frames); } } u32 GPUBackend::GetQueuedFrameCount() { return s_cpu_thread_state.queued_frames.load(std::memory_order_acquire); } void GPUBackend::ReleaseQueuedFrame() { s_cpu_thread_state.queued_frames.fetch_sub(1, std::memory_order_acq_rel); // We need two states here in case we get preempted in between the compare_exchange_strong() and Post(). // This means that we will only release the semaphore once the CPU is guaranteed to be in a waiting state, // and ensure that we don't post twice if the CPU thread lags and we process 2 frames before it wakes up. u32 expected = CPUThreadState::WAIT_CPU_THREAD_WAITING; if (s_cpu_thread_state.wait_state.compare_exchange_strong(expected, CPUThreadState::WAIT_GPU_THREAD_SIGNALING, std::memory_order_acq_rel, std::memory_order_acquire)) { if (g_settings.gpu_max_queued_frames > 0) DEV_LOG("--> Unblocking CPU thread"); s_cpu_thread_state.gpu_thread_wait.Post(); // This needs to be a compare_exchange, because the CPU thread can clear the flag before we execute this line. expected = CPUThreadState::WAIT_GPU_THREAD_SIGNALING; s_cpu_thread_state.wait_state.compare_exchange_strong(expected, CPUThreadState::WAIT_GPU_THREAD_POSTED, std::memory_order_acq_rel, std::memory_order_acquire); } } bool GPUBackend::AllocateMemorySaveStates(std::span states, Error* error) { bool result; GPUThread::RunOnBackend( [states, error, &result](GPUBackend* backend) { // Free old textures first. for (size_t i = 0; i < states.size(); i++) g_gpu_device->RecycleTexture(std::move(states[i].vram_texture)); // Maximize potential for texture reuse by flushing the current command buffer. g_gpu_device->WaitForGPUIdle(); for (size_t i = 0; i < states.size(); i++) { if (!backend->AllocateMemorySaveState(states[i], error)) { // Try flushing the pool. WARNING_LOG("Failed to allocate memory save state texture, trying flushing pool."); g_gpu_device->PurgeTexturePool(); g_gpu_device->WaitForGPUIdle(); if (!backend->AllocateMemorySaveState(states[i], error)) { // Free anything that was allocated. for (size_t j = 0; j <= i; i++) { states[j].state_data.deallocate(); states[j].vram_texture.reset(); result = false; return; } } } } backend->RestoreDeviceContext(); result = true; }, true, false); return result; } void GPUBackend::QueueUpdateResolutionScale() { DebugAssert(!GPUThread::IsOnThread()); GPUThread::RunOnBackend( [](GPUBackend* backend) { Error error; if (!backend->UpdateResolutionScale(&error)) [[unlikely]] GPUThread::ReportFatalErrorAndShutdown( fmt::format("Failed to update resolution scale: {}", error.GetDescription())); }, false, true); } void GPUBackend::HandleCommand(const GPUThreadCommand* cmd) { switch (cmd->type) { case GPUBackendCommandType::ClearVRAM: { ClearVRAM(); } break; case GPUBackendCommandType::LoadState: { LoadState(static_cast(cmd)); } break; case GPUBackendCommandType::LoadMemoryState: { System::MemorySaveState& mss = *static_cast(cmd)->memory_save_state; StateWrapper sw(mss.gpu_state_data.span(0, mss.gpu_state_size), StateWrapper::Mode::Read, SAVE_STATE_VERSION); DoMemoryState(sw, mss); } break; case GPUBackendCommandType::SaveMemoryState: { System::MemorySaveState& mss = *static_cast(cmd)->memory_save_state; StateWrapper sw(mss.gpu_state_data.span(), StateWrapper::Mode::Write, SAVE_STATE_VERSION); DoMemoryState(sw, mss); mss.gpu_state_size = static_cast(sw.GetPosition()); } break; case GPUBackendCommandType::ClearDisplay: { m_presenter.ClearDisplay(); } break; case GPUBackendCommandType::UpdateDisplay: { HandleUpdateDisplayCommand(static_cast(cmd)); } break; case GPUBackendCommandType::SubmitFrame: { HandleSubmitFrameCommand(&static_cast(cmd)->frame); } break; case GPUBackendCommandType::ClearCache: { ClearCache(); } break; case GPUBackendCommandType::BufferSwapped: { OnBufferSwapped(); } break; case GPUBackendCommandType::ReadVRAM: { const GPUBackendReadVRAMCommand* ccmd = static_cast(cmd); s_counters.num_reads++; ReadVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height)); } break; case GPUBackendCommandType::FillVRAM: { const GPUBackendFillVRAMCommand* ccmd = static_cast(cmd); FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->color, ccmd->interlaced_rendering, ccmd->active_line_lsb); } break; case GPUBackendCommandType::UpdateVRAM: { const GPUBackendUpdateVRAMCommand* ccmd = static_cast(cmd); s_counters.num_writes++; UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->data, ccmd->set_mask_while_drawing, ccmd->check_mask_before_draw); } break; case GPUBackendCommandType::CopyVRAM: { const GPUBackendCopyVRAMCommand* ccmd = static_cast(cmd); s_counters.num_copies++; CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x), ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->set_mask_while_drawing, ccmd->check_mask_before_draw); } break; case GPUBackendCommandType::SetDrawingArea: { const GPUBackendSetDrawingAreaCommand* ccmd = static_cast(cmd); GPU_SW_Rasterizer::g_drawing_area = ccmd->new_area; m_clamped_drawing_area = GPU::GetClampedDrawingArea(ccmd->new_area); DrawingAreaChanged(); } break; case GPUBackendCommandType::UpdateCLUT: { const GPUBackendUpdateCLUTCommand* ccmd = static_cast(cmd); GPU_SW_Rasterizer::UpdateCLUT(ccmd->reg, ccmd->clut_is_8bit); } break; case GPUBackendCommandType::DrawPolygon: { const GPUBackendDrawPolygonCommand* ccmd = static_cast(cmd); s_counters.num_vertices += ccmd->num_vertices; s_counters.num_primitives++; DrawPolygon(ccmd); } break; case GPUBackendCommandType::DrawPrecisePolygon: { const GPUBackendDrawPolygonCommand* ccmd = static_cast(cmd); s_counters.num_vertices += ccmd->num_vertices; s_counters.num_primitives++; DrawPrecisePolygon(static_cast(cmd)); } break; case GPUBackendCommandType::DrawRectangle: { const GPUBackendDrawRectangleCommand* ccmd = static_cast(cmd); s_counters.num_vertices++; s_counters.num_primitives++; DrawSprite(ccmd); } break; case GPUBackendCommandType::DrawLine: { const GPUBackendDrawLineCommand* ccmd = static_cast(cmd); s_counters.num_vertices += ccmd->num_vertices; s_counters.num_primitives += ccmd->num_vertices / 2; DrawLine(ccmd); } break; case GPUBackendCommandType::DrawPreciseLine: { const GPUBackendDrawPreciseLineCommand* ccmd = static_cast(cmd); s_counters.num_vertices += ccmd->num_vertices; s_counters.num_primitives += ccmd->num_vertices / 2; DrawPreciseLine(ccmd); } break; DefaultCaseIsUnreachable(); } } void GPUBackend::HandleUpdateDisplayCommand(const GPUBackendUpdateDisplayCommand* cmd) { // Height has to be doubled because we halved it on the GPU side. m_presenter.SetDisplayParameters( cmd->display_width, cmd->display_height, cmd->display_origin_left, cmd->display_origin_top, cmd->display_vram_width, cmd->display_vram_height << BoolToUInt32(cmd->interlaced_display_enabled), cmd->display_pixel_aspect_ratio); UpdateDisplay(cmd); if (cmd->submit_frame) HandleSubmitFrameCommand(&cmd->frame); } void GPUBackend::HandleSubmitFrameCommand(const GPUBackendFramePresentationParameters* cmd) { // For regtest. Host::FrameDoneOnGPUThread(this, cmd->frame_number); if (cmd->media_capture) m_presenter.SendDisplayToMediaCapture(cmd->media_capture); // If this returns false, our backend object is deleted and replaced with null, so bail out. if (cmd->present_frame) { const bool result = m_presenter.PresentFrame(&m_presenter, this, cmd->allow_present_skip, cmd->present_time); ReleaseQueuedFrame(); if (!result) return; } // Update perf counters *after* throttling, we want to measure from start-of-frame // to start-of-frame, not end-of-frame to end-of-frame (will be noisy due to different // amounts of computation happening in each frame). if (cmd->update_performance_counters) PerformanceCounters::Update(this, cmd->frame_number, cmd->internal_frame_number); RestoreDeviceContext(); } void GPUBackend::GetStatsString(SmallStringBase& str) const { if (IsUsingHardwareBackend()) { str.format("{}{} HW | {} P | {} DC | {} B | {} RP | {} RB | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), g_gpu_settings.gpu_use_thread ? "-MT" : "", s_stats.num_primitives, s_stats.host_num_draws, s_stats.host_num_barriers, s_stats.host_num_render_passes, s_stats.host_num_downloads, s_stats.num_copies, s_stats.num_writes); } else { str.format("{}{} SW | {} P | {} R | {} C | {} W", GPUDevice::RenderAPIToString(g_gpu_device->GetRenderAPI()), g_gpu_settings.gpu_use_thread ? "-MT" : "", s_stats.num_primitives, s_stats.num_reads, s_stats.num_copies, s_stats.num_writes); } } void GPUBackend::GetMemoryStatsString(SmallStringBase& str) const { const u32 vram_usage_mb = static_cast((g_gpu_device->GetVRAMUsage() + (1048576 - 1)) / 1048576); const u32 stream_kb = static_cast((s_stats.host_buffer_streamed + (1024 - 1)) / 1024); str.format("{} MB VRAM | {} KB STR | {} TC | {} TU", vram_usage_mb, stream_kb, s_stats.host_num_copies, s_stats.host_num_uploads); } void GPUBackend::ResetStatistics() { s_counters = {}; g_gpu_device->ResetStatistics(); } void GPUBackend::UpdateStatistics(u32 frame_count) { const GPUDevice::Statistics& stats = g_gpu_device->GetStatistics(); const u32 round = (frame_count - 1); #define UPDATE_COUNTER(x) s_stats.x = (s_counters.x + round) / frame_count #define UPDATE_GPU_STAT(x) s_stats.host_##x = (stats.x + round) / frame_count UPDATE_COUNTER(num_reads); UPDATE_COUNTER(num_writes); UPDATE_COUNTER(num_copies); UPDATE_COUNTER(num_vertices); UPDATE_COUNTER(num_primitives); // UPDATE_COUNTER(num_read_texture_updates); // UPDATE_COUNTER(num_ubo_updates); UPDATE_GPU_STAT(buffer_streamed); UPDATE_GPU_STAT(num_draws); UPDATE_GPU_STAT(num_barriers); UPDATE_GPU_STAT(num_render_passes); UPDATE_GPU_STAT(num_copies); UPDATE_GPU_STAT(num_downloads); UPDATE_GPU_STAT(num_uploads); #undef UPDATE_GPU_STAT #undef UPDATE_COUNTER ResetStatistics(); } bool GPUBackend::RenderScreenshotToBuffer(u32 width, u32 height, bool postfx, bool apply_aspect_ratio, Image* out_image, Error* error) { bool result; GPUThread::RunOnBackend( [width, height, postfx, apply_aspect_ratio, out_image, error, &result](GPUBackend* backend) { if (!backend) { Error::SetStringView(error, "No GPU backend."); result = false; return; } // Post-processing requires that the size match the window. const bool really_postfx = postfx && g_gpu_device->HasMainSwapChain(); u32 image_width, image_height; if (really_postfx) { image_width = g_gpu_device->GetMainSwapChain()->GetWidth(); image_height = g_gpu_device->GetMainSwapChain()->GetHeight(); } else { // Crop it if border overlay isn't enabled. GSVector4i draw_rect, display_rect; backend->GetPresenter().CalculateDrawRect(static_cast(width), static_cast(height), apply_aspect_ratio, false, &display_rect, &draw_rect); image_width = static_cast(display_rect.width()); image_height = static_cast(display_rect.height()); } result = backend->GetPresenter().RenderScreenshotToBuffer(image_width, image_height, really_postfx, apply_aspect_ratio, out_image, error); backend->RestoreDeviceContext(); }, true, false); return result; } void GPUBackend::RenderScreenshotToFile(const std::string_view path, DisplayScreenshotMode mode, u8 quality, bool show_osd_message) { GPUThread::RunOnBackend( [path = std::string(path), mode, quality, show_osd_message](GPUBackend* backend) mutable { if (!backend) return; const GSVector2i size = backend->GetPresenter().CalculateScreenshotSize(mode); if (size.x == 0 || size.y == 0) return; std::string osd_key; if (show_osd_message) osd_key = fmt::format("ScreenshotSaver_{}", path); const bool internal_resolution = (mode != DisplayScreenshotMode::ScreenResolution); const bool apply_aspect_ratio = (mode != DisplayScreenshotMode::UncorrectedInternalResolution); Error error; Image image; if (!backend->m_presenter.RenderScreenshotToBuffer(size.x, size.y, !internal_resolution, apply_aspect_ratio, &image, &error)) { ERROR_LOG("Failed to render {}x{} screenshot: {}", size.x, size.y, error.GetDescription()); if (show_osd_message) { Host::AddIconOSDWarning( std::move(osd_key), ICON_EMOJI_WARNING, fmt::format(TRANSLATE_FS("GPU", "Failed to save screenshot:\n{}"), error.GetDescription())); } backend->RestoreDeviceContext(); return; } // no more GPU calls backend->RestoreDeviceContext(); auto fp = FileSystem::OpenManagedCFile(path.c_str(), "wb", &error); if (!fp) { ERROR_LOG("Can't open file '{}': {}", Path::GetFileName(path), error.GetDescription()); if (show_osd_message) { Host::AddIconOSDWarning( std::move(osd_key), ICON_EMOJI_WARNING, fmt::format(TRANSLATE_FS("GPU", "Failed to save screenshot:\n{}"), error.GetDescription())); } return; } if (show_osd_message) { // Use a 60 second timeout to give it plenty of time to actually save. Host::AddIconOSDMessage(osd_key, ICON_EMOJI_CAMERA_WITH_FLASH, fmt::format(TRANSLATE_FS("GPU", "Saving screenshot to '{}'."), Path::GetFileName(path)), 60.0f); } System::QueueAsyncTask([path = std::move(path), fp = fp.release(), quality, flip_y = g_gpu_device->UsesLowerLeftOrigin(), image = std::move(image), osd_key = std::move(osd_key)]() mutable { Error error; if (flip_y) image.FlipY(); if (image.GetFormat() != ImageFormat::RGBA8) { std::optional convert_image = image.ConvertToRGBA8(&error); if (!convert_image.has_value()) { ERROR_LOG("Failed to convert {} screenshot to RGBA8: {}", Image::GetFormatName(image.GetFormat()), error.GetDescription()); image.Invalidate(); } else { image = std::move(convert_image.value()); } } bool result = false; if (image.IsValid()) { image.SetAllPixelsOpaque(); result = image.SaveToFile(path.c_str(), fp, quality, &error); if (!result) ERROR_LOG("Failed to save screenshot to '{}': '{}'", Path::GetFileName(path), error.GetDescription()); } if (!osd_key.empty()) { Host::AddIconOSDMessage(std::move(osd_key), ICON_EMOJI_CAMERA, fmt::format(result ? TRANSLATE_FS("GPU", "Saved screenshot to '{}'.") : TRANSLATE_FS("GPU", "Failed to save screenshot to '{}'."), Path::GetFileName(path), result ? Host::OSD_INFO_DURATION : Host::OSD_ERROR_DURATION)); } std::fclose(fp); return result; }); }, false, false); } namespace { class GPUNullBackend final : public GPUBackend { public: GPUNullBackend(GPUPresenter& presenter); ~GPUNullBackend() override; bool Initialize(bool upload_vram, Error* error) override; bool UpdateSettings(const GPUSettings& old_settings, Error* error) override; u32 GetResolutionScale() const override; bool UpdateResolutionScale(Error* error) override; void RestoreDeviceContext() override; void FlushRender() override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 interlaced_display_field) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask, bool check_mask) override; void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; void DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) override; void DrawSprite(const GPUBackendDrawRectangleCommand* cmd) override; void DrawLine(const GPUBackendDrawLineCommand* cmd) override; void DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) override; void DrawingAreaChanged() override; void ClearCache() override; void OnBufferSwapped() override; void ClearVRAM() override; void UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) override; void LoadState(const GPUBackendLoadStateCommand* cmd) override; bool AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) override; void DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) override; }; } // namespace GPUNullBackend::GPUNullBackend(GPUPresenter& presenter) : GPUBackend(presenter) { } GPUNullBackend::~GPUNullBackend() = default; bool GPUNullBackend::Initialize(bool upload_vram, Error* error) { return GPUBackend::Initialize(upload_vram, error); } bool GPUNullBackend::UpdateSettings(const GPUSettings& old_settings, Error* error) { return GPUBackend::UpdateSettings(old_settings, error); } u32 GPUNullBackend::GetResolutionScale() const { return 1; } bool GPUNullBackend::UpdateResolutionScale(Error* error) { return true; } void GPUNullBackend::RestoreDeviceContext() { } void GPUNullBackend::FlushRender() { } void GPUNullBackend::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { } void GPUNullBackend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, bool interlaced_rendering, u8 interlaced_display_field) { } void GPUNullBackend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) { } void GPUNullBackend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, bool set_mask, bool check_mask) { } void GPUNullBackend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) { } void GPUNullBackend::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) { } void GPUNullBackend::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) { } void GPUNullBackend::DrawLine(const GPUBackendDrawLineCommand* cmd) { } void GPUNullBackend::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) { } void GPUNullBackend::DrawingAreaChanged() { } void GPUNullBackend::ClearCache() { } void GPUNullBackend::OnBufferSwapped() { } void GPUNullBackend::ClearVRAM() { } void GPUNullBackend::UpdateDisplay(const GPUBackendUpdateDisplayCommand* cmd) { } void GPUNullBackend::LoadState(const GPUBackendLoadStateCommand* cmd) { } bool GPUNullBackend::AllocateMemorySaveState(System::MemorySaveState& mss, Error* error) { return false; } void GPUNullBackend::DoMemoryState(StateWrapper& sw, System::MemorySaveState& mss) { } std::unique_ptr GPUBackend::CreateNullBackend(GPUPresenter& presenter) { return std::make_unique(presenter); }