diff --git a/src/core/gpu.h b/src/core/gpu.h index 344236c25..d177a4454 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -326,8 +326,10 @@ private: AddCommandTicks(pixels); } - ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i clamped_rect, bool textured, bool semitransparent) + ALWAYS_INLINE_RELEASE void AddDrawRectangleTicks(const GSVector4i rect, bool textured, bool semitransparent) { + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + u32 drawn_width = clamped_rect.width(); u32 drawn_height = clamped_rect.height(); @@ -373,8 +375,9 @@ private: AddCommandTicks(ticks_per_row * drawn_height); } - ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i clamped_rect, bool shaded) + ALWAYS_INLINE_RELEASE void AddDrawLineTicks(const GSVector4i rect, bool shaded) { + const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); u32 drawn_width = clamped_rect.width(); u32 drawn_height = clamped_rect.height(); diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index cb6f6d1d9..6704646d9 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -97,6 +97,8 @@ GPUBackend::~GPUBackend() bool GPUBackend::Initialize(bool clear_vram, Error* error) { + m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area); + if (!CompileDisplayPipelines(true, true, g_gpu_settings.display_24bit_chroma_smoothing, error)) return false; @@ -451,9 +453,9 @@ void GPUBackend::HandleCommand(const GPUThreadCommand* cmd) case GPUBackendCommandType::SetDrawingArea: { - FlushRender(); const GPUBackendSetDrawingAreaCommand* ccmd = static_cast(cmd); GPU_SW_Rasterizer::g_drawing_area = ccmd->new_area; + m_clamped_drawing_area = GPU::GetClampedDrawingArea(ccmd->new_area); DrawingAreaChanged(); } break; diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index 3053c9490..a595fbbca 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -174,6 +174,8 @@ protected: void DestroyDeinterlaceTextures(); bool ApplyChromaSmoothing(); + GSVector4i m_clamped_drawing_area = {}; + s32 m_display_width = 0; s32 m_display_height = 0; s32 m_display_origin_left = 0; diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index cdcd5a4d5..09a50bbd7 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -428,22 +428,20 @@ bool GPU::HandleRenderPolygonCommand() } // Cull polygons which are too large. - const GSVector2 v0f = GSVector2::load(&cmd->vertices[0].x); - const GSVector2 v1f = GSVector2::load(&cmd->vertices[1].x); - const GSVector2 v2f = GSVector2::load(&cmd->vertices[2].x); - const GSVector2 min_pos_12 = v1f.min(v2f); - const GSVector2 max_pos_12 = v1f.max(v2f); - const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); + const GSVector2i v0 = GSVector2i::load(&cmd->vertices[0].native_x); + const GSVector2i v1 = GSVector2i::load(&cmd->vertices[1].native_x); + const GSVector2i v2 = GSVector2i::load(&cmd->vertices[2].native_x); + const GSVector2i min_pos_12 = v1.min_s32(v2); + const GSVector2i max_pos_12 = v1.max_s32(v2); + const GSVector4i draw_rect_012 = + GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1)); const bool first_tri_culled = - (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || - !draw_rect_012.rintersects(m_clamped_drawing_area)); + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT); if (first_tri_culled) { - // TODO: GPU events... somehow. - DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x, - cmd->vertices[0].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y, - cmd->vertices[2].native_x, cmd->vertices[2].native_y); + DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].native_x, cmd->vertices[0].native_y, + cmd->vertices[1].native_x, cmd->vertices[1].native_y, cmd->vertices[2].native_x, + cmd->vertices[2].native_y); if (!rc.quad_polygon) { @@ -462,19 +460,19 @@ bool GPU::HandleRenderPolygonCommand() // quads if (rc.quad_polygon) { - const GSVector2 v3f = GSVector2::load(&cmd->vertices[3].x); - const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) + const GSVector2i v3 = GSVector2i::load(&cmd->vertices[3].native_x); + const GSVector4i draw_rect_123 = GSVector4i(min_pos_12.min_s32(v3)) + .upl64(GSVector4i(max_pos_12.max_s32(v3))) .add32(GSVector4i::cxpr(0, 0, 1, 1)); // Cull polygons which are too large. const bool second_tri_culled = - (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - !draw_rect_123.rintersects(m_clamped_drawing_area)); + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT); if (second_tri_culled) { - DEBUG_LOG("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", - cmd->vertices[2].native_x, cmd->vertices[2].native_y, cmd->vertices[1].native_x, - cmd->vertices[1].native_y, cmd->vertices[0].native_x, cmd->vertices[0].native_y); + DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].native_x, + cmd->vertices[2].native_y, cmd->vertices[1].native_x, cmd->vertices[1].native_y, + cmd->vertices[3].native_x, cmd->vertices[3].native_y); if (first_tri_culled) { @@ -483,9 +481,6 @@ bool GPU::HandleRenderPolygonCommand() } // Remove second part of quad. - // NOTE: Culling this way results in subtle differences with UV clamping, since the fourth vertex is no - // longer considered in the range. This is mainly apparent when the UV gradient is zero. Seems like it - // generally looks better this way, so I'm keeping it. cmd->size = GPUThreadCommand::AlignCommandSize(sizeof(GPUBackendDrawPrecisePolygonCommand) + 3 * sizeof(GPUBackendDrawPrecisePolygonCommand::Vertex)); cmd->num_vertices = 3; @@ -540,11 +535,10 @@ bool GPU::HandleRenderPolygonCommand() const GSVector4i draw_rect_012 = GSVector4i::xyxy(min_pos_12.min_s32(v0), max_pos_12.max_s32(v0)).add32(GSVector4i::cxpr(0, 0, 1, 1)); const bool first_tri_culled = - (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || - !draw_rect_012.rintersects(m_clamped_drawing_area)); + (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT); if (first_tri_culled) { - DEBUG_LOG("Culling off-screen/too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, + DEBUG_LOG("Culling too-large polygon: {},{} {},{} {},{}", cmd->vertices[0].x, cmd->vertices[0].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); if (!rc.quad_polygon) @@ -568,12 +562,11 @@ bool GPU::HandleRenderPolygonCommand() // Cull polygons which are too large. const bool second_tri_culled = - (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || - !draw_rect_123.rintersects(m_clamped_drawing_area)); + (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT); if (second_tri_culled) { DEBUG_LOG("Culling too-large polygon (quad second half): {},{} {},{} {},{}", cmd->vertices[2].x, - cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, cmd->vertices[0].y); + cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[3].x, cmd->vertices[3].y); if (first_tri_culled) { @@ -681,15 +674,7 @@ bool GPU::HandleRenderRectangleCommand() } const GSVector4i rect = GSVector4i(cmd->x, cmd->y, cmd->x + cmd->width, cmd->y + cmd->height); - const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); - if (clamped_rect.rempty()) [[unlikely]] - { - DEBUG_LOG("Culling off-screen rectangle {}", rect); - EndCommand(); - return true; - } - - AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); + AddDrawRectangleTicks(rect, rc.texture_enable, rc.transparency_enable); GPUBackend::PushCommand(cmd); EndCommand(); @@ -883,15 +868,13 @@ void GPU::FinishPolyline() const GSVector2 end_pos = GSVector2::load(&end.x); const GSVector4i rect = GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT) { DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); } else { - AddDrawLineTicks(clamped_rect, m_render_command.shading_enable); + AddDrawLineTicks(rect, m_render_command.shading_enable); cmd->vertices[out_vertex_count++] = start; cmd->vertices[out_vertex_count++] = end; @@ -930,15 +913,13 @@ void GPU::FinishPolyline() const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - - if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) + if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT) { - DEBUG_LOG("Culling too-large/off-screen line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); + DEBUG_LOG("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); } else { - AddDrawLineTicks(clamped_rect, m_render_command.shading_enable); + AddDrawLineTicks(rect, m_render_command.shading_enable); GPUBackendDrawLineCommand::Vertex* out_vertex = &cmd->vertices[out_vertex_count]; out_vertex_count += 2; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index d4b2054ba..2157d60a4 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -296,7 +296,7 @@ bool GPU_HW::Initialize(bool upload_vram, Error* error) if (upload_vram) UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); - DrawingAreaChanged(); + m_drawing_area_changed = true; return true; } @@ -2494,7 +2494,12 @@ void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd) const GSVector4i rect = GSVector4i::xyxy(start_pos.min_s32(end_pos), start_pos.max_s32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty()); + DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT); + if (clamped_rect.rempty()) + { + GL_INS_FMT("Culling off-screen line {} => {}", start_pos, end_pos); + continue; + } AddDrawnRectangle(clamped_rect); DrawLine(GSVector4(bounds), start_color, end_color, depth); @@ -2533,7 +2538,11 @@ void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd) const GSVector4i rect = GSVector4i(GSVector4::xyxy(start_pos.min(end_pos), start_pos.max(end_pos))).add32(GSVector4i::cxpr(0, 0, 1, 1)); const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); - DebugAssert(rect.width() <= MAX_PRIMITIVE_WIDTH && rect.height() <= MAX_PRIMITIVE_HEIGHT && !clamped_rect.rempty()); + if (clamped_rect.rempty()) + { + GL_INS_FMT("Culling off-screen line {} => {}", start_pos, end_pos); + continue; + } AddDrawnRectangle(clamped_rect); DrawLine(bounds, start_color, end_color, depth); @@ -2658,6 +2667,16 @@ void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) { + const GSVector2i pos = GSVector2i::load(&cmd->x); + const GSVector2i size = GSVector2i::load(&cmd->width).u16to32(); + const GSVector4i rect = GSVector4i::xyxy(pos, pos.add32(size)); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + if (clamped_rect.rempty()) + { + GL_INS_FMT("Culling off-screen sprite {}", rect); + return; + } + PrepareDraw(cmd); SetBatchDepthBuffer(cmd, false); SetBatchSpriteMode(cmd, m_allow_sprite_mode); @@ -2673,11 +2692,6 @@ void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) const u32 rectangle_width = cmd->width; const u32 rectangle_height = cmd->height; - const GSVector4i rect = - GSVector4i(pos_x, pos_y, pos_x + static_cast(rectangle_width), pos_y + static_cast(rectangle_height)); - const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); - DebugAssert(!clamped_rect.rempty()); - // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. u32 tex_top = orig_tex_top; for (u32 y_offset = 0; y_offset < rectangle_height;) @@ -2747,15 +2761,12 @@ void GPU_HW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) { - PrepareDraw(cmd); - SetBatchDepthBuffer(cmd, false); - // TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below. const float depth = GetCurrentNormalizedVertexDepth(); const bool raw_texture = (cmd->texture_enable && cmd->raw_texture_enable); - const u32 num_vertices = cmd->num_vertices; - const u32 texpage = m_draw_mode.bits; + const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16); std::array vertices; + u32 num_vertices = cmd->num_vertices; for (u32 i = 0; i < num_vertices; i++) { const GPUBackendDrawPolygonCommand::Vertex& vert = cmd->vertices[i]; @@ -2764,7 +2775,13 @@ void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) vert.texcoord, 0xFFFF0000u); } - FinishPolygonDraw(cmd, vertices, num_vertices, false, false); + GSVector4i clamped_draw_rect_012, clamped_draw_rect_123; + if (BeginPolygonDraw(cmd, vertices, num_vertices, clamped_draw_rect_012, clamped_draw_rect_123)) + { + SetBatchDepthBuffer(cmd, false); + + FinishPolygonDraw(cmd, vertices, num_vertices, false, false, clamped_draw_rect_012, clamped_draw_rect_123); + } if (ShouldDrawWithSoftwareRenderer()) { @@ -2778,14 +2795,12 @@ void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) { - PrepareDraw(cmd); - // TODO: This could write directly to the mapped GPU pointer. But watch out for the reads below. const float depth = GetCurrentNormalizedVertexDepth(); const bool raw_texture = (cmd->texture_enable && cmd->raw_texture_enable); - const u32 num_vertices = cmd->num_vertices; - const u32 texpage = m_draw_mode.bits; + const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16); std::array vertices; + u32 num_vertices = cmd->num_vertices; for (u32 i = 0; i < num_vertices; i++) { const GPUBackendDrawPrecisePolygonCommand::Vertex& vert = cmd->vertices[i]; @@ -2793,14 +2808,18 @@ void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) vert.texcoord, 0xFFFF0000u); } - const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w; - SetBatchDepthBuffer(cmd, use_depth); - if (use_depth) - CheckForDepthClear(cmd, vertices.data(), num_vertices); + GSVector4i clamped_draw_rect_012, clamped_draw_rect_123; + if (BeginPolygonDraw(cmd, vertices, num_vertices, clamped_draw_rect_012, clamped_draw_rect_123)) + { + const bool use_depth = m_pgxp_depth_buffer && cmd->valid_w; + SetBatchDepthBuffer(cmd, use_depth); + if (use_depth) + CheckForDepthClear(cmd, vertices.data(), num_vertices); - // Use PGXP to exclude primitives that are definitely 3D. - const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); - FinishPolygonDraw(cmd, vertices, num_vertices, true, is_3d); + // Use PGXP to exclude primitives that are definitely 3D. + const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); + FinishPolygonDraw(cmd, vertices, num_vertices, true, is_3d, clamped_draw_rect_012, clamped_draw_rect_123); + } if (ShouldDrawWithSoftwareRenderer()) { @@ -2820,16 +2839,11 @@ void GPU_HW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) } } -ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand* cmd, - std::array& vertices, u32 num_vertices, - bool is_precise, bool is_3d) +ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand* cmd, + std::array& vertices, u32& num_vertices, + GSVector4i& clamped_draw_rect_012, + GSVector4i& clamped_draw_rect_123) { - // Use PGXP to exclude primitives that are definitely 3D. - if (m_resolution_scale > 1 && !is_3d && cmd->quad_polygon) - HandleFlippedQuadTextureCoordinates(cmd, vertices.data()); - else if (m_allow_sprite_mode) - SetBatchSpriteMode(cmd, is_precise ? !is_3d : IsPossibleSpritePolygon(vertices.data())); - const GSVector2 v0f = GSVector2::load(&vertices[0].x); const GSVector2 v1f = GSVector2::load(&vertices[1].x); const GSVector2 v2f = GSVector2::load(&vertices[2].x); @@ -2837,9 +2851,69 @@ ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand const GSVector2 max_pos_12 = v1f.max(v2f); const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))).add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); - DebugAssert(draw_rect_012.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_012.height() <= MAX_PRIMITIVE_HEIGHT && - !clamped_draw_rect_012.rempty()); + clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); + const bool first_tri_culled = clamped_draw_rect_012.rempty(); + if (first_tri_culled) + { + GL_INS_FMT("Culling off-screen polygon: {},{} {},{} {},{}", vertices[0].x, vertices[0].y, vertices[1].y, + vertices[1].x, vertices[2].y, vertices[2].y); + + if (num_vertices != 4) + return false; + } + + if (num_vertices == 4) + { + const GSVector2 v3f = GSVector2::load(&vertices[3].x); + const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) + .add32(GSVector4i::cxpr(0, 0, 1, 1)); + clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); + const bool second_tri_culled = clamped_draw_rect_123.rempty(); + if (second_tri_culled) + { + GL_INS_FMT("Culling off-screen polygon (quad second half): {},{} {},{} {},{}", vertices[2].x, vertices[2].y, + vertices[1].x, vertices[1].y, vertices[3].x, vertices[3].y); + + if (first_tri_culled) + { + // both parts culled + return false; + } + + // Remove second part of quad. + // NOTE: Culling this way results in subtle differences with UV clamping, since the fourth vertex is no + // longer considered in the range. This is mainly apparent when the UV gradient is zero. Seems like it + // generally looks better this way, so I'm keeping it. + num_vertices = 3; + } + else + { + // If first part was culled, move the second part to the first. + if (first_tri_culled) + { + clamped_draw_rect_012 = clamped_draw_rect_123; + std::memcpy(&vertices[0], &vertices[2], sizeof(BatchVertex)); + std::memcpy(&vertices[2], &vertices[3], sizeof(BatchVertex)); + num_vertices = 3; + } + } + } + + PrepareDraw(cmd); + return true; +} + +ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand* cmd, + std::array& vertices, u32 num_vertices, + bool is_precise, bool is_3d, + const GSVector4i clamped_draw_rect_012, + const GSVector4i clamped_draw_rect_123) +{ + // Use PGXP to exclude primitives that are definitely 3D. + if (m_resolution_scale > 1 && !is_3d && cmd->quad_polygon) + HandleFlippedQuadTextureCoordinates(cmd, vertices.data()); + else if (m_allow_sprite_mode) + SetBatchSpriteMode(cmd, is_precise ? !is_3d : IsPossibleSpritePolygon(vertices.data())); if (cmd->texture_enable && m_compute_uv_range) ComputePolygonUVLimits(cmd, vertices.data(), num_vertices); @@ -2864,12 +2938,6 @@ ALWAYS_INLINE_RELEASE void GPU_HW::FinishPolygonDraw(const GPUBackendDrawCommand // quads, use num_vertices here, because the first half might be culled if (num_vertices == 4) { - const GSVector2 v3f = GSVector2::load(&vertices[3].x); - const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) - .add32(GSVector4i::cxpr(0, 0, 1, 1)); - const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); - DebugAssert(draw_rect_123.width() <= MAX_PRIMITIVE_WIDTH && draw_rect_123.height() <= MAX_PRIMITIVE_HEIGHT && - !clamped_draw_rect_123.rempty()); AddDrawnRectangle(clamped_draw_rect_123); DebugAssert(m_batch_index_space >= 3); @@ -3723,7 +3791,7 @@ void GPU_HW::FlushRender() void GPU_HW::DrawingAreaChanged() { - m_clamped_drawing_area = GPU::GetClampedDrawingArea(GPU_SW_Rasterizer::g_drawing_area); + FlushRender(); m_drawing_area_changed = true; } diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 7d7263ef8..c9f8d9d46 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -227,8 +227,11 @@ private: void EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices); void EnsureVertexBufferSpaceForCommand(const GPUBackendDrawCommand* cmd); void PrepareDraw(const GPUBackendDrawCommand* cmd); + bool BeginPolygonDraw(const GPUBackendDrawCommand* cmd, std::array& vertices, u32& num_vertices, + GSVector4i& clamped_draw_rect_012, GSVector4i& clamped_draw_rect_123); void FinishPolygonDraw(const GPUBackendDrawCommand* cmd, std::array& vertices, u32 num_vertices, - bool is_precise, bool is_3d); + bool is_precise, bool is_3d, const GSVector4i clamped_draw_rect_012, + const GSVector4i clamped_draw_rect_123); void ResetBatchVertexDepth(); /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. @@ -325,7 +328,6 @@ private: BatchUBOData m_batch_ubo_data = {}; // Bounding box of VRAM area that the GPU has drawn into. - GSVector4i m_clamped_drawing_area = {}; GSVector4i m_vram_dirty_draw_rect = INVALID_RECT; GSVector4i m_vram_dirty_write_rect = INVALID_RECT; // TODO: Don't use in TC mode, should be kept at zero. GSVector4i m_current_uv_rect = INVALID_RECT; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index ba493132e..6005bbf21 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -12,6 +12,7 @@ #include "common/align.h" #include "common/assert.h" +#include "common/gsvector_formatter.h" #include "common/intrin.h" #include "common/log.h" @@ -135,6 +136,18 @@ void GPU_SW::DrawPrecisePolygon(const GPUBackendDrawPrecisePolygonCommand* cmd) void GPU_SW::DrawSprite(const GPUBackendDrawRectangleCommand* cmd) { + // Sprites coordinates are truncated in the GPU class, so it's safe to cull them here. + // Probably wrong, but if we ever change it, this should be removed. + const GSVector2i pos = GSVector2i::load(&cmd->x); + const GSVector2i size = GSVector2i::load(&cmd->width).u16to32(); + const GSVector4i rect = GSVector4i::xyxy(pos, pos.add32(size)); + const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); + if (clamped_rect.rempty()) + { + DEBUG_LOG("Culling off-screen sprite {}", rect); + return; + } + const GPU_SW_Rasterizer::DrawRectangleFunction DrawFunction = GPU_SW_Rasterizer::GetDrawRectangleFunction(cmd->texture_enable, cmd->raw_texture_enable, cmd->transparency_enable);