// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin // SPDX-License-Identifier: CC-BY-NC-ND-4.0 #include "cpu_pgxp.h" #include "gpu.h" #include "gpu_backend.h" #include "gpu_dump.h" #include "gpu_thread_commands.h" #include "interrupt_controller.h" #include "system.h" #include "common/assert.h" #include "common/gsvector_formatter.h" #include "common/log.h" #include "common/string_util.h" LOG_CHANNEL(GPU); #define CHECK_COMMAND_SIZE(num_words) \ if (m_fifo.GetSize() < num_words) \ { \ m_command_total_words = num_words; \ return false; \ } static u32 s_cpu_to_vram_dump_id = 1; static u32 s_vram_to_cpu_dump_id = 1; static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero) { return value == 0 ? value_for_zero : value; } void GPU::TryExecuteCommands() { while (m_pending_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty()) { switch (m_blitter_state) { case BlitterState::Idle: { const u32 command = FifoPeek(0) >> 24; if ((this->*s_GP0_command_handler_table[command])()) continue; else return; } case BlitterState::WritingVRAM: { DebugAssert(m_blit_remaining_words > 0); const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize()); m_blit_buffer.reserve(m_blit_buffer.size() + words_to_copy); for (u32 i = 0; i < words_to_copy; i++) m_blit_buffer.push_back(FifoPop()); m_blit_remaining_words -= words_to_copy; DEBUG_LOG("VRAM write burst of {} words, {} words remaining", words_to_copy, m_blit_remaining_words); if (m_blit_remaining_words == 0) FinishVRAMWrite(); continue; } case BlitterState::ReadingVRAM: { return; } break; case BlitterState::DrawingPolyLine: { const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1; u32 terminator_index = m_render_command.shading_enable ? ((static_cast(m_polyline_buffer.size()) & 1u) ^ 1u) : 0u; for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex) { // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. // terminator is on the first word for the vertex if ((FifoPeek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000)) break; } const bool found_terminator = (terminator_index < m_fifo.GetSize()); const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize()); if (words_to_copy > 0) { m_polyline_buffer.reserve(m_polyline_buffer.size() + words_to_copy); for (u32 i = 0; i < words_to_copy; i++) m_polyline_buffer.push_back(m_fifo.Pop()); } DEBUG_LOG("Added {} words to polyline", words_to_copy); if (found_terminator) { // drop terminator m_fifo.RemoveOne(); DEBUG_LOG("Drawing poly-line with {} vertices", GetPolyLineVertexCount()); FinishPolyline(); m_polyline_buffer.clear(); EndCommand(); continue; } } break; } } } void GPU::ExecuteCommands() { const bool was_executing_from_event = std::exchange(m_executing_commands, true); TryExecuteCommands(); UpdateDMARequest(); UpdateGPUIdle(); m_executing_commands = was_executing_from_event; if (!was_executing_from_event) UpdateCommandTickEvent(); } void GPU::EndCommand() { m_blitter_state = BlitterState::Idle; m_command_total_words = 0; } GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable() { GP0CommandHandlerTable table = {}; for (u32 i = 0; i < static_cast(table.size()); i++) table[i] = &GPU::HandleUnknownGP0Command; table[0x00] = &GPU::HandleNOPCommand; table[0x01] = &GPU::HandleClearCacheCommand; table[0x02] = &GPU::HandleFillRectangleCommand; table[0x03] = &GPU::HandleNOPCommand; for (u32 i = 0x04; i <= 0x1E; i++) table[i] = &GPU::HandleNOPCommand; table[0x1F] = &GPU::HandleInterruptRequestCommand; for (u32 i = 0x20; i <= 0x7F; i++) { const GPURenderCommand rc{i << 24}; switch (rc.primitive) { case GPUPrimitive::Polygon: table[i] = &GPU::HandleRenderPolygonCommand; break; case GPUPrimitive::Line: table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand; break; case GPUPrimitive::Rectangle: table[i] = &GPU::HandleRenderRectangleCommand; break; default: table[i] = &GPU::HandleUnknownGP0Command; break; } } table[0xE0] = &GPU::HandleNOPCommand; table[0xE1] = &GPU::HandleSetDrawModeCommand; table[0xE2] = &GPU::HandleSetTextureWindowCommand; table[0xE3] = &GPU::HandleSetDrawingAreaTopLeftCommand; table[0xE4] = &GPU::HandleSetDrawingAreaBottomRightCommand; table[0xE5] = &GPU::HandleSetDrawingOffsetCommand; table[0xE6] = &GPU::HandleSetMaskBitCommand; for (u32 i = 0xE7; i <= 0xEF; i++) table[i] = &GPU::HandleNOPCommand; for (u32 i = 0x80; i <= 0x9F; i++) table[i] = &GPU::HandleCopyRectangleVRAMToVRAMCommand; for (u32 i = 0xA0; i <= 0xBF; i++) table[i] = &GPU::HandleCopyRectangleCPUToVRAMCommand; for (u32 i = 0xC0; i <= 0xDF; i++) table[i] = &GPU::HandleCopyRectangleVRAMToCPUCommand; table[0xFF] = &GPU::HandleNOPCommand; return table; } bool GPU::HandleUnknownGP0Command() { const u32 command = FifoPeek() >> 24; ERROR_LOG("Unimplemented GP0 command 0x{:02X}", command); SmallString dump; for (u32 i = 0; i < m_fifo.GetSize(); i++) dump.append_format("{}{:08X}", (i > 0) ? " " : "", FifoPeek(i)); ERROR_LOG("FIFO: {}", dump); m_fifo.RemoveOne(); EndCommand(); return true; } bool GPU::HandleNOPCommand() { m_fifo.RemoveOne(); EndCommand(); return true; } bool GPU::HandleClearCacheCommand() { DEBUG_LOG("GP0 clear cache"); InvalidateCLUT(); GPUBackend::PushCommand(GPUBackend::NewClearCacheCommand()); m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleInterruptRequestCommand() { DEBUG_LOG("GP0 interrupt request"); m_GPUSTAT.interrupt_request = true; InterruptController::SetLineState(InterruptController::IRQ::GPU, true); m_fifo.RemoveOne(); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetDrawModeCommand() { const u32 param = FifoPop() & 0x00FFFFFFu; DEBUG_LOG("Set draw mode {:08X}", param); SetDrawMode(Truncate16(param)); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetTextureWindowCommand() { const u32 param = FifoPop() & 0x00FFFFFFu; SetTextureWindow(param); AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetDrawingAreaTopLeftCommand() { const u32 param = FifoPop() & 0x00FFFFFFu; const u32 left = param & DRAWING_AREA_COORD_MASK; const u32 top = (param >> 10) & DRAWING_AREA_COORD_MASK; DEBUG_LOG("Set drawing area top-left: ({}, {})", left, top); if (m_drawing_area.left != left || m_drawing_area.top != top) { m_drawing_area.left = left; m_drawing_area.top = top; m_drawing_area_changed = true; SetClampedDrawingArea(); } AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetDrawingAreaBottomRightCommand() { const u32 param = FifoPop() & 0x00FFFFFFu; const u32 right = param & DRAWING_AREA_COORD_MASK; const u32 bottom = (param >> 10) & DRAWING_AREA_COORD_MASK; DEBUG_LOG("Set drawing area bottom-right: ({}, {})", right, bottom); if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) { m_drawing_area.right = right; m_drawing_area.bottom = bottom; m_drawing_area_changed = true; SetClampedDrawingArea(); } AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetDrawingOffsetCommand() { const u32 param = FifoPop() & 0x00FFFFFFu; const s32 x = SignExtendN<11, s32>(param & 0x7FFu); const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu); DEBUG_LOG("Set drawing offset ({}, {})", x, y); if (m_drawing_offset.x != x || m_drawing_offset.y != y) { m_drawing_offset.x = x; m_drawing_offset.y = y; } AddCommandTicks(1); EndCommand(); return true; } bool GPU::HandleSetMaskBitCommand() { const u32 param = FifoPop() & 0x00FFFFFFu; constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); const u32 gpustat_bits = (param & 0x03) << 11; m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; DEBUG_LOG("Set mask bit {} {}", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); AddCommandTicks(1); EndCommand(); return true; } void GPU::PrepareForDraw() { if (m_drawing_area_changed) { m_drawing_area_changed = false; GPUBackendSetDrawingAreaCommand* cmd = GPUBackend::NewSetDrawingAreaCommand(); cmd->new_area = m_drawing_area; GPUBackend::PushCommand(cmd); } } void GPU::FillDrawCommand(GPUBackendDrawCommand* RESTRICT cmd, GPURenderCommand rc) const { cmd->interlaced_rendering = IsInterlacedRenderingEnabled(); cmd->active_line_lsb = ConvertToBoolUnchecked(m_crtc_state.active_line_lsb); cmd->check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; cmd->set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; cmd->texture_enable = rc.IsTexturingEnabled(); cmd->raw_texture_enable = rc.raw_texture_enable; cmd->transparency_enable = rc.transparency_enable; cmd->shading_enable = rc.shading_enable; cmd->quad_polygon = rc.quad_polygon; cmd->dither_enable = rc.IsDitheringEnabled() && m_draw_mode.mode_reg.dither_enable; cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; cmd->palette.bits = m_draw_mode.palette_reg.bits; cmd->window = m_draw_mode.texture_window; } bool GPU::HandleRenderPolygonCommand() { const GPURenderCommand rc{FifoPeek(0)}; // shaded vertices use the colour from the first word for the first vertex const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable); const u32 num_vertices = rc.quad_polygon ? 4 : 3; const u32 total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable); CHECK_COMMAND_SIZE(total_words); if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); // setup time static constexpr u16 s_setup_time[2][2][2] = {{{46, 226}, {334, 496}}, {{82, 262}, {370, 532}}}; const TickCount setup_ticks = static_cast(ZeroExtend32( s_setup_time[BoolToUInt8(rc.quad_polygon)][BoolToUInt8(rc.shading_enable)][BoolToUInt8(rc.texture_enable)])); AddCommandTicks(setup_ticks); TRACE_LOG("Render {} {} {} {} polygon ({} verts, {} words per vert), {} setup ticks", rc.quad_polygon ? "four-point" : "three-point", rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", num_vertices, words_per_vertex, setup_ticks); // set draw state up // TODO: Get rid of SetTexturePalette() and just fill it as needed if (rc.texture_enable) { const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16); SetDrawMode((texpage_attribute & GPUDrawModeReg::POLYGON_TEXPAGE_MASK) | (m_draw_mode.mode_reg.bits & ~GPUDrawModeReg::POLYGON_TEXPAGE_MASK)); SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg); } m_render_command.bits = rc.bits; m_fifo.RemoveOne(); PrepareForDraw(); if (g_settings.gpu_pgxp_enable) { GPUBackendDrawPrecisePolygonCommand* RESTRICT cmd = GPUBackend::NewDrawPrecisePolygonCommand(num_vertices); FillDrawCommand(cmd, rc); cmd->num_vertices = Truncate16(num_vertices); const u32 first_color = rc.color_for_first_vertex; const bool shaded = rc.shading_enable; const bool textured = rc.texture_enable; bool valid_w = g_settings.gpu_pgxp_texture_correction; for (u32 i = 0; i < num_vertices; i++) { GPUBackendDrawPrecisePolygonCommand::Vertex* RESTRICT vert = &cmd->vertices[i]; vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; const u64 maddr_and_pos = m_fifo.Pop(); const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; vert->native_x = m_drawing_offset.x + vp.x; vert->native_y = m_drawing_offset.y + vp.y; vert->texcoord = textured ? Truncate16(FifoPop()) : 0; valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->native_x, vert->native_y, m_drawing_offset.x, m_drawing_offset.y, &vert->x, &vert->y, &vert->w); } cmd->valid_w = valid_w; if (!valid_w) { if (g_settings.gpu_pgxp_disable_2d) { // NOTE: This reads uninitialized data, but it's okay, it doesn't get used. for (u32 i = 0; i < num_vertices; i++) { GPUBackendDrawPrecisePolygonCommand::Vertex& v = cmd->vertices[i]; GSVector2::store(&v.x, GSVector2(GSVector2i::load(&v.native_x))); v.w = 1.0f; } } else { for (u32 i = 0; i < num_vertices; i++) cmd->vertices[i].w = 1.0f; } } // Cull polygons which are too large. const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].native_x); const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].native_x); const GSVector2i v2 = GSVector2i::load(&cmd->vertices[2].native_x); const GSVector2i min_pos_12 = v1.min_s32(v2); const GSVector2i max_pos_12 = v1.max_s32(v2); const GSVector4i draw_rect_012 = GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1)); const bool first_tri_culled = (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT); if (first_tri_culled) { DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x, cmd->vertices[0].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y, cmd->vertices[2].native_x, cmd->vertices[2].native_y); if (!rc.quad_polygon) { EndCommand(); return true; } } else { AddDrawTriangleTicks(GSVector2i::load(&cmd->vertices[0].native_x), GSVector2i::load(&cmd->vertices[1].native_x), GSVector2i::load(&cmd->vertices[2].native_x), rc.shading_enable, rc.texture_enable, rc.transparency_enable); } // quads if (rc.quad_polygon) { const GSVector2i v3 = GSVector2i::load(&cmd->vertices[3].native_x); const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(v3)) .upl64(GSVector4i(max_pos_12.max_s32(v3))) .add32(GSVector4i::cxpr(0, 0, 1, 1)); // Cull polygons which are too large. const bool second_tri_culled = (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT); if (second_tri_culled) { DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].native_x, cmd->vertices[2].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y, cmd->vertices[3].native_x, cmd->vertices[3].native_y); if (first_tri_culled) { EndCommand(); return true; } // Remove second part of quad. cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPrecisePolygonCommand) + 3 * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); cmd->num_vertices = 3; } else { AddDrawTriangleTicks(GSVector2i::load(&cmd->vertices[2].native_x), GSVector2i::load(&cmd->vertices[1].native_x), GSVector2i::load(&cmd->vertices[3].native_x), rc.shading_enable, rc.texture_enable, rc.transparency_enable); // If first part was culled, move the second part to the first. if (first_tri_culled) { std::memcpy(&cmd->vertices[0], &cmd->vertices[2], sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); std::memcpy(&cmd->vertices[2], &cmd->vertices[3], sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPrecisePolygonCommand) + 3 * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); cmd->num_vertices = 3; } } } GPUBackend::PushCommand(cmd); } else { GPUBackendDrawPolygonCommand* RESTRICT cmd = GPUBackend::NewDrawPolygonCommand(num_vertices); FillDrawCommand(cmd, rc); cmd->num_vertices = Truncate16(num_vertices); const u32 first_color = rc.color_for_first_vertex; const bool shaded = rc.shading_enable; const bool textured = rc.texture_enable; for (u32 i = 0; i < num_vertices; i++) { GPUBackendDrawPolygonCommand::Vertex* RESTRICT vert = &cmd->vertices[i]; vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; const u64 maddr_and_pos = m_fifo.Pop(); const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; vert->x = m_drawing_offset.x + vp.x; vert->y = m_drawing_offset.y + vp.y; vert->texcoord = textured ? Truncate16(FifoPop()) : 0; } // Cull polygons which are too large. const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); const GSVector2i v2 = GSVector2i::load(&cmd->vertices[2].x); const GSVector2i min_pos_12 = v1.min_s32(v2); const GSVector2i max_pos_12 = v1.max_s32(v2); const GSVector4i draw_rect_012 = GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1)); const bool first_tri_culled = (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT); if (first_tri_culled) { DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); if (!rc.quad_polygon) { EndCommand(); return true; } } else { AddDrawTriangleTicks(v0, v1, v2, rc.shading_enable, rc.texture_enable, rc.transparency_enable); } // quads if (rc.quad_polygon) { const GSVector2i v3 = GSVector2i::load(&cmd->vertices[3].x); const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(v3)) .upl64(GSVector4i(max_pos_12.max_s32(v3))) .add32(GSVector4i::cxpr(0, 0, 1, 1)); // Cull polygons which are too large. const bool second_tri_culled = (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT); if (second_tri_culled) { DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[3].x, cmd->vertices[3].y); if (first_tri_culled) { EndCommand(); return true; } // Remove second part of quad. cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPolygonCommand) + 3 * sizeof(GPUBackendDrawPolygonCommand::Vertex)); cmd->num_vertices = 3; } else { AddDrawTriangleTicks(v2, v1, v3, rc.shading_enable, rc.texture_enable, rc.transparency_enable); // If first part was culled, move the second part to the first. if (first_tri_culled) { std::memcpy(&cmd->vertices[0], &cmd->vertices[2], sizeof(GPUBackendDrawPolygonCommand::Vertex)); std::memcpy(&cmd->vertices[2], &cmd->vertices[3], sizeof(GPUBackendDrawPolygonCommand::Vertex)); cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPolygonCommand) + 3 * sizeof(GPUBackendDrawPolygonCommand::Vertex)); cmd->num_vertices = 3; } } } GPUBackend::PushCommand(cmd); } EndCommand(); return true; } bool GPU::HandleRenderRectangleCommand() { const GPURenderCommand rc{FifoPeek(0)}; const u32 total_words = 2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == GPUDrawRectangleSize::Variable); CHECK_COMMAND_SIZE(total_words); if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); if (rc.texture_enable) { SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); UpdateCLUTIfNeeded(m_draw_mode.mode_reg.texture_mode, m_draw_mode.palette_reg); } const TickCount setup_ticks = 16; AddCommandTicks(setup_ticks); TRACE_LOG("Render {} {} {} rectangle ({} words), {} setup ticks", rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", total_words, setup_ticks); m_render_command.bits = rc.bits; m_fifo.RemoveOne(); PrepareForDraw(); GPUBackendDrawRectangleCommand* cmd = GPUBackend::NewDrawRectangleCommand(); FillDrawCommand(cmd, rc); cmd->color = rc.color_for_first_vertex; const GPUVertexPosition vp{FifoPop()}; cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); if (rc.texture_enable) { const u32 texcoord_and_palette = FifoPop(); cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); cmd->texcoord = Truncate16(texcoord_and_palette); } else { cmd->palette.bits = 0; cmd->texcoord = 0; } switch (rc.rectangle_size) { case GPUDrawRectangleSize::R1x1: cmd->width = 1; cmd->height = 1; break; case GPUDrawRectangleSize::R8x8: cmd->width = 8; cmd->height = 8; break; case GPUDrawRectangleSize::R16x16: cmd->width = 16; cmd->height = 16; break; default: { const u32 width_and_height = FifoPop(); cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); } break; } const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); AddDrawRectangleTicks(rect, rc.texture_enable, rc.transparency_enable); GPUBackend::PushCommand(cmd); EndCommand(); return true; } bool GPU::HandleRenderLineCommand() { const GPURenderCommand rc{FifoPeek(0)}; const u32 total_words = rc.shading_enable ? 4 : 3; CHECK_COMMAND_SIZE(total_words); if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); TRACE_LOG("Render {} {} line ({} total words)", rc.transparency_enable ? "semi-transparent" : "opaque", rc.shading_enable ? "shaded" : "monochrome", total_words); m_render_command.bits = rc.bits; m_fifo.RemoveOne(); PrepareForDraw(); if (g_settings.gpu_pgxp_enable) { GPUBackendDrawPreciseLineCommand* RESTRICT cmd = GPUBackend::NewDrawPreciseLineCommand(2); FillDrawCommand(cmd, rc); cmd->palette.bits = 0; bool valid_w = g_settings.gpu_pgxp_texture_correction; for (u32 i = 0; i < 2; i++) { const u32 color = ((i != 0 && rc.shading_enable) ? FifoPop() : rc.bits) & UINT32_C(0x00FFFFFF); const u64 maddr_and_pos = m_fifo.Pop(); const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; GPUBackendDrawPreciseLineCommand::Vertex* RESTRICT vert = &cmd->vertices[i]; vert->native_x = m_drawing_offset.x + vp.x; vert->native_y = m_drawing_offset.y + vp.y; vert->color = color; valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->native_x, vert->native_y, m_drawing_offset.x, m_drawing_offset.y, &vert->x, &vert->y, &vert->w); } if (!(cmd->valid_w = valid_w)) { for (u32 i = 0; i < 2; i++) cmd->vertices[i].w = 1.0f; } const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].native_x); const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].native_x); const GSVector4i rect = GSVector4i::xyxy(v0.min_s32(v1), v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT) { DEBUG_LOG("Culling too-large line: {} - {}", v0, v1); EndCommand(); return true; } AddDrawLineTicks(rect, rc.shading_enable); GPUBackend::PushCommand(cmd); } else { GPUBackendDrawLineCommand* RESTRICT cmd = GPUBackend::NewDrawLineCommand(2); FillDrawCommand(cmd, rc); cmd->palette.bits = 0; if (rc.shading_enable) { cmd->vertices[0].color = rc.color_for_first_vertex; const GPUVertexPosition start_pos{FifoPop()}; cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); const GPUVertexPosition end_pos{FifoPop()}; cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; } else { cmd->vertices[0].color = rc.color_for_first_vertex; cmd->vertices[1].color = rc.color_for_first_vertex; const GPUVertexPosition start_pos{FifoPop()}; cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; const GPUVertexPosition end_pos{FifoPop()}; cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; } const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].x); const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].x); const GSVector4i rect = GSVector4i::xyxy(v0.min_s32(v1), v0.max_s32(v1)).add32(GSVector4i::cxpr(0, 0, 1, 1)); if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT) { DEBUG_LOG("Culling too-large line: {} - {}", v0, v1); EndCommand(); return true; } AddDrawLineTicks(rect, rc.shading_enable); GPUBackend::PushCommand(cmd); } EndCommand(); return true; } bool GPU::HandleRenderPolyLineCommand() { // always read the first two vertices, we test for the terminator after that const GPURenderCommand rc{FifoPeek(0)}; const u32 min_words = rc.shading_enable ? 3 : 4; CHECK_COMMAND_SIZE(min_words); if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); const TickCount setup_ticks = 16; AddCommandTicks(setup_ticks); TRACE_LOG("Render {} {} poly-line, {} setup ticks", rc.transparency_enable ? "semi-transparent" : "opaque", rc.shading_enable ? "shaded" : "monochrome", setup_ticks); m_render_command.bits = rc.bits; m_fifo.RemoveOne(); const u32 words_to_pop = min_words - 1; // m_blit_buffer.resize(words_to_pop); // FifoPopRange(m_blit_buffer.data(), words_to_pop); m_polyline_buffer.reserve(words_to_pop); for (u32 i = 0; i < words_to_pop; i++) m_polyline_buffer.push_back(m_fifo.Pop()); // polyline goes via a different path through the blit buffer m_blitter_state = BlitterState::DrawingPolyLine; m_command_total_words = 0; return true; } void GPU::FinishPolyline() { PrepareForDraw(); const u32 num_vertices = GetPolyLineVertexCount(); DebugAssert(num_vertices >= 2); if (g_settings.gpu_pgxp_enable) { GPUBackendDrawPreciseLineCommand* RESTRICT cmd = GPUBackend::NewDrawPreciseLineCommand((num_vertices - 1) * 2); FillDrawCommand(cmd, m_render_command); cmd->palette.bits = 0; u32 buffer_pos = 0; u32 out_vertex_count = 0; const bool shaded = m_render_command.shading_enable; bool valid_w = g_settings.gpu_pgxp_texture_correction; GPUBackendDrawPreciseLineCommand::Vertex start, end; const auto read_vertex = [this, &buffer_pos, &valid_w](GPUBackendDrawPreciseLineCommand::Vertex& RESTRICT dest, u32 color) { const u64 maddr_and_pos = m_polyline_buffer[buffer_pos++]; const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; dest.native_x = m_drawing_offset.x + vp.x; dest.native_y = m_drawing_offset.y + vp.y; dest.color = color; valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, dest.native_x, dest.native_y, m_drawing_offset.x, m_drawing_offset.y, &dest.x, &dest.y, &dest.w); }; read_vertex(start, m_render_command.color_for_first_vertex); for (u32 i = 1; i < num_vertices; i++) { const u32 color = (shaded ? Truncate32(m_polyline_buffer[buffer_pos++]) : m_render_command.bits) & UINT32_C(0x00FFFFFF); read_vertex(end, color); const GSVector2i start_pos = GSVector2i::load(&start.native_x); const GSVector2i end_pos = GSVector2i::load(&end.native_x); const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT) { DEBUG_LOG("Culling too-large line: {} - {}", start_pos, end_pos); } else { AddDrawLineTicks(rect, m_render_command.shading_enable); cmd->vertices[out_vertex_count++] = start; cmd->vertices[out_vertex_count++] = end; } start = end; } if (out_vertex_count > 0) { DebugAssert(out_vertex_count <= cmd->num_vertices); cmd->num_vertices = Truncate16(out_vertex_count); GPUBackend::PushCommand(cmd); } } else { GPUBackendDrawLineCommand* RESTRICT cmd = GPUBackend::NewDrawLineCommand((num_vertices - 1) * 2); FillDrawCommand(cmd, m_render_command); cmd->palette.bits = 0; u32 buffer_pos = 0; const GPUVertexPosition start_vp{Truncate32(m_polyline_buffer[buffer_pos++])}; const GSVector2i draw_offset = GSVector2i::load(&m_drawing_offset.x); GSVector2i start_pos = GSVector2i(start_vp.x, start_vp.y).add32(draw_offset); u32 start_color = m_render_command.color_for_first_vertex; const bool shaded = m_render_command.shading_enable; u32 out_vertex_count = 0; for (u32 i = 1; i < num_vertices; i++) { const u32 end_color = shaded ? (Truncate32(m_polyline_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF))) : m_render_command.color_for_first_vertex; const GPUVertexPosition vp{Truncate32(m_polyline_buffer[buffer_pos++])}; const GSVector2i end_pos = GSVector2i(vp.x, vp.y).add32(draw_offset); const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT) { DEBUG_LOG("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); } else { AddDrawLineTicks(rect, m_render_command.shading_enable); GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count]; out_vertex_count += 2; GSVector2i::store(&out_vertex[0].x, start_pos); out_vertex[0].color = start_color; GSVector2i::store(&out_vertex[1].x, end_pos); out_vertex[1].color = end_color; } start_pos = end_pos; start_color = end_color; } if (out_vertex_count > 0) { DebugAssert(out_vertex_count <= cmd->num_vertices); cmd->num_vertices = Truncate16(out_vertex_count); GPUBackend::PushCommand(cmd); } } } bool GPU::HandleFillRectangleCommand() { CHECK_COMMAND_SIZE(3); if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); const u32 color = FifoPop() & 0x00FFFFFF; const u32 dst_x = FifoPeek() & 0x3F0; const u32 dst_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; const u32 width = ((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF; const u32 height = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; DEBUG_LOG("Fill VRAM rectangle offset=({},{}), size=({},{})", dst_x, dst_y, width, height); if (width > 0 && height > 0) { GPUBackendFillVRAMCommand* cmd = GPUBackend::NewFillVRAMCommand(); cmd->x = static_cast(dst_x); cmd->y = static_cast(dst_y); cmd->width = static_cast(width); cmd->height = static_cast(height); cmd->color = color; cmd->interlaced_rendering = IsInterlacedRenderingEnabled(); cmd->active_line_lsb = m_crtc_state.active_line_lsb; GPUBackend::PushCommand(cmd); } AddCommandTicks(46 + ((width / 8) + 9) * height); EndCommand(); return true; } bool GPU::HandleCopyRectangleCPUToVRAMCommand() { CHECK_COMMAND_SIZE(3); m_fifo.RemoveOne(); const u32 coords = FifoPop(); const u32 size = FifoPop(); // Tenga Seiha does a bunch of completely-invalid VRAM writes on boot, then expects GPU idle to be set. // It's unclear what actually happens, I need to write another test, but for now, just skip these uploads. // Not setting GPU idle during the write command breaks Doom, so that's not an option. if (size == 0xFFFFFFFFu) [[unlikely]] { ERROR_LOG("Ignoring likely-invalid VRAM write to ({},{})", (coords & VRAM_WIDTH_MASK), ((coords >> 16) & VRAM_HEIGHT_MASK)); return true; } const u32 dst_x = coords & VRAM_WIDTH_MASK; const u32 dst_y = (coords >> 16) & VRAM_HEIGHT_MASK; const u32 copy_width = ReplaceZero(size & VRAM_WIDTH_MASK, 0x400); const u32 copy_height = ReplaceZero((size >> 16) & VRAM_HEIGHT_MASK, 0x200); const u32 num_pixels = copy_width * copy_height; const u32 num_words = ((num_pixels + 1) / 2); DEBUG_LOG("Copy rectangle from CPU to VRAM offset=({},{}), size=({},{})", dst_x, dst_y, copy_width, copy_height); EndCommand(); m_blitter_state = BlitterState::WritingVRAM; m_blit_buffer.reserve(num_words); m_blit_remaining_words = num_words; m_vram_transfer.x = Truncate16(dst_x); m_vram_transfer.y = Truncate16(dst_y); m_vram_transfer.width = Truncate16(copy_width); m_vram_transfer.height = Truncate16(copy_height); return true; } void GPU::FinishVRAMWrite() { if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); if (m_blit_remaining_words == 0) { if (g_settings.gpu_dump_cpu_to_vram_copies) { DumpVRAMToFile(TinyString::from_format("cpu_to_vram_copy_{}.png", s_cpu_to_vram_dump_id++), m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * m_vram_transfer.width, m_blit_buffer.data(), true); } UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, m_blit_buffer.data(), m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); } else { const u32 num_pixels = ZeroExtend32(m_vram_transfer.width) * ZeroExtend32(m_vram_transfer.height); const u32 num_words = (num_pixels + 1) / 2; const u32 transferred_words = num_words - m_blit_remaining_words; const u32 transferred_pixels = transferred_words * 2; const u32 transferred_full_rows = transferred_pixels / m_vram_transfer.width; const u32 transferred_width_last_row = transferred_pixels % m_vram_transfer.width; WARNING_LOG("Partial VRAM write - transfer finished with {} of {} words remaining ({} full rows, {} last row)", m_blit_remaining_words, num_words, transferred_full_rows, transferred_width_last_row); const u8* blit_ptr = reinterpret_cast(m_blit_buffer.data()); if (transferred_full_rows > 0) { UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, static_cast(transferred_full_rows), blit_ptr, m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); blit_ptr += (ZeroExtend32(m_vram_transfer.width) * transferred_full_rows) * sizeof(u16); } if (transferred_width_last_row > 0) { UpdateVRAM(m_vram_transfer.x, static_cast(m_vram_transfer.y + transferred_full_rows), static_cast(transferred_width_last_row), 1, blit_ptr, m_GPUSTAT.set_mask_while_drawing, m_GPUSTAT.check_mask_before_draw); } } m_blit_buffer.clear(); m_vram_transfer = {}; m_blitter_state = BlitterState::Idle; } bool GPU::HandleCopyRectangleVRAMToCPUCommand() { CHECK_COMMAND_SIZE(3); m_fifo.RemoveOne(); m_vram_transfer.x = Truncate16(FifoPeek() & VRAM_WIDTH_MASK); m_vram_transfer.y = Truncate16((FifoPop() >> 16) & VRAM_HEIGHT_MASK); m_vram_transfer.width = ((Truncate16(FifoPeek()) - 1) & VRAM_WIDTH_MASK) + 1; m_vram_transfer.height = ((Truncate16(FifoPop() >> 16) - 1) & VRAM_HEIGHT_MASK) + 1; DEBUG_LOG("Copy rectangle from VRAM to CPU offset=({},{}), size=({},{})", m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0); // ensure VRAM shadow is up to date ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); if (g_settings.gpu_dump_vram_to_cpu_copies) { DumpVRAMToFile(TinyString::from_format("vram_to_cpu_copy_{}.png", s_vram_to_cpu_dump_id++), m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH, &g_vram[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true); } // switch to pixel-by-pixel read state m_blitter_state = BlitterState::ReadingVRAM; m_command_total_words = 0; // toss the entire read in the recorded trace. we might want to change this to mirroring GPUREAD in the future.. if (m_gpu_dump) [[unlikely]] m_gpu_dump->WriteDiscardVRAMRead(m_vram_transfer.width, m_vram_transfer.height); return true; } bool GPU::HandleCopyRectangleVRAMToVRAMCommand() { CHECK_COMMAND_SIZE(4); m_fifo.RemoveOne(); const u32 src_x = FifoPeek() & VRAM_WIDTH_MASK; const u32 src_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; const u32 dst_x = FifoPeek() & VRAM_WIDTH_MASK; const u32 dst_y = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; const u32 width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400); const u32 height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200); DEBUG_LOG("Copy rectangle from VRAM to VRAM src=({},{}), dst=({},{}), size=({},{})", src_x, src_y, dst_x, dst_y, width, height); // Some VRAM copies aren't going to do anything. Most games seem to send a 2x2 VRAM copy at the end of a frame. const bool skip_copy = width == 0 || height == 0 || (src_x == dst_x && src_y == dst_y && !m_GPUSTAT.set_mask_while_drawing); if (!skip_copy) { GPUBackendCopyVRAMCommand* cmd = GPUBackend::NewCopyVRAMCommand(); cmd->src_x = static_cast(src_x); cmd->src_y = static_cast(src_y); cmd->dst_x = static_cast(dst_x); cmd->dst_y = static_cast(dst_y); cmd->width = static_cast(width); cmd->height = static_cast(height); cmd->check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; cmd->set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; GPUBackend::PushCommand(cmd); } AddCommandTicks(width * height * 2); EndCommand(); return true; }