GPU: Assume vertex commands are 8-byte aligned

This commit is contained in:
Stenzek 2024-12-28 20:09:37 +10:00
parent 8c807118c0
commit 799f5bdf97
No known key found for this signature in database
2 changed files with 15 additions and 15 deletions

View File

@ -2480,9 +2480,9 @@ void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)
for (u32 i = 0; i < num_vertices; i += 2) for (u32 i = 0; i < num_vertices; i += 2)
{ {
const GSVector2i start_pos = GSVector2i::load<false>(&cmd->vertices[i].x); const GSVector2i start_pos = GSVector2i::load<true>(&cmd->vertices[i].x);
const u32 start_color = cmd->vertices[i].color; const u32 start_color = cmd->vertices[i].color;
const GSVector2i end_pos = GSVector2i::load<false>(&cmd->vertices[i + 1].x); const GSVector2i end_pos = GSVector2i::load<true>(&cmd->vertices[i + 1].x);
const u32 end_color = cmd->vertices[i + 1].color; const u32 end_color = cmd->vertices[i + 1].color;
const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos); const GSVector4i bounds = GSVector4i::xyxy(start_pos, end_pos);
@ -2524,9 +2524,9 @@ void GPU_HW::DrawPreciseLine(const GPUBackendDrawPreciseLineCommand* cmd)
for (u32 i = 0; i < num_vertices; i += 2) for (u32 i = 0; i < num_vertices; i += 2)
{ {
const GSVector2 start_pos = GSVector2::load<false>(&cmd->vertices[i].x); const GSVector2 start_pos = GSVector2::load<true>(&cmd->vertices[i].x);
const u32 start_color = cmd->vertices[i].color; const u32 start_color = cmd->vertices[i].color;
const GSVector2 end_pos = GSVector2::load<false>(&cmd->vertices[i + 1].x); const GSVector2 end_pos = GSVector2::load<true>(&cmd->vertices[i + 1].x);
const u32 end_color = cmd->vertices[i + 1].color; const u32 end_color = cmd->vertices[i + 1].color;
const GSVector4 bounds = GSVector4::xyxy(start_pos, end_pos); const GSVector4 bounds = GSVector4::xyxy(start_pos, end_pos);
@ -2765,7 +2765,7 @@ void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
for (u32 i = 0; i < num_vertices; i++) for (u32 i = 0; i < num_vertices; i++)
{ {
const GPUBackendDrawPolygonCommand::Vertex& vert = cmd->vertices[i]; const GPUBackendDrawPolygonCommand::Vertex& vert = cmd->vertices[i];
const GSVector2 vert_pos = GSVector2(GSVector2i::load<false>(&vert.x)); const GSVector2 vert_pos = GSVector2(GSVector2i::load<true>(&vert.x));
vertices[i].Set(vert_pos.x, vert_pos.y, depth, 1.0f, raw_texture ? UINT32_C(0x00808080) : vert.color, texpage, vertices[i].Set(vert_pos.x, vert_pos.y, depth, 1.0f, raw_texture ? UINT32_C(0x00808080) : vert.color, texpage,
vert.texcoord, 0xFFFF0000u); vert.texcoord, 0xFFFF0000u);
} }
@ -2839,9 +2839,9 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand*
GSVector4i& clamped_draw_rect_012, GSVector4i& clamped_draw_rect_012,
GSVector4i& clamped_draw_rect_123) GSVector4i& clamped_draw_rect_123)
{ {
GSVector2 v0f = GSVector2::load<false>(&vertices[0].x); GSVector2 v0f = GSVector2::load<true>(&vertices[0].x);
GSVector2 v1f = GSVector2::load<false>(&vertices[1].x); GSVector2 v1f = GSVector2::load<true>(&vertices[1].x);
GSVector2 v2f = GSVector2::load<false>(&vertices[2].x); GSVector2 v2f = GSVector2::load<true>(&vertices[2].x);
GSVector2 min_pos_12 = v1f.min(v2f); GSVector2 min_pos_12 = v1f.min(v2f);
GSVector2 max_pos_12 = v1f.max(v2f); GSVector2 max_pos_12 = v1f.max(v2f);
GSVector4i draw_rect_012 = GSVector4i draw_rect_012 =
@ -2867,7 +2867,7 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand*
}; };
const GSVector4 tv01f = truncate_pos(GSVector4::xyxy(v0f, v1f)); const GSVector4 tv01f = truncate_pos(GSVector4::xyxy(v0f, v1f));
const GSVector4 tv23f = truncate_pos(GSVector4::xyxy(v2f, GSVector2::load<false>(&vertices[3].x))); const GSVector4 tv23f = truncate_pos(GSVector4::xyxy(v2f, GSVector2::load<true>(&vertices[3].x)));
const GSVector2 tv0f = tv01f.xy(); const GSVector2 tv0f = tv01f.xy();
const GSVector2 tv1f = tv01f.zw(); const GSVector2 tv1f = tv01f.zw();
const GSVector2 tv2f = tv23f.xy(); const GSVector2 tv2f = tv23f.xy();
@ -2881,11 +2881,11 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand*
!tdraw_rect_012.rintersects(m_clamped_drawing_area)); !tdraw_rect_012.rintersects(m_clamped_drawing_area));
if (!first_tri_culled) if (!first_tri_culled)
{ {
GSVector4::storel<false>(&vertices[0].x, tv01f); GSVector4::storel<true>(&vertices[0].x, tv01f);
GSVector4::storeh<false>(&vertices[1].x, tv01f); GSVector4::storeh<true>(&vertices[1].x, tv01f);
GSVector4::storel<false>(&vertices[2].x, tv23f); GSVector4::storel<true>(&vertices[2].x, tv23f);
if (num_vertices == 4) if (num_vertices == 4)
GSVector4::storeh<false>(&vertices[3].x, tv23f); GSVector4::storeh<true>(&vertices[3].x, tv23f);
GL_INS_FMT("Adjusted polygon from [{} {} {}] to [{} {} {}] due to coordinate truncation", v0f, v1f, v2f, tv0f, GL_INS_FMT("Adjusted polygon from [{} {} {}] to [{} {} {}] due to coordinate truncation", v0f, v1f, v2f, tv0f,
tv1f, tv2f); tv1f, tv2f);
@ -2908,7 +2908,7 @@ ALWAYS_INLINE_RELEASE bool GPU_HW::BeginPolygonDraw(const GPUBackendDrawCommand*
if (num_vertices == 4) if (num_vertices == 4)
{ {
const GSVector2 v3f = GSVector2::load<false>(&vertices[3].x); const GSVector2 v3f = GSVector2::load<true>(&vertices[3].x);
const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f))))
.add32(GSVector4i::cxpr(0, 0, 1, 1)); .add32(GSVector4i::cxpr(0, 0, 1, 1));
clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area);

View File

@ -121,7 +121,7 @@ private:
static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT)); static_assert(GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS >= (VRAM_WIDTH * VRAM_HEIGHT));
struct BatchVertex struct alignas(16) BatchVertex
{ {
float x; float x;
float y; float y;