duckstation/src/core/gpu_sw_rasterizer.cpp
2024-12-19 23:32:15 +10:00

130 lines
4.4 KiB
C++

// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#include "gpu_sw_rasterizer.h"
#include "gpu.h"
#include "cpuinfo.h"
#include "common/gsvector.h"
#include "common/log.h"
#include "common/string_util.h"
LOG_CHANNEL(GPU_SW);
namespace GPU_SW_Rasterizer {
constinit const DitherLUT g_dither_lut = []() constexpr {
DitherLUT lut = {};
for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++)
{
for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++)
{
for (u32 value = 0; value < DITHER_LUT_SIZE; value++)
{
const s32 dithered_value = (static_cast<s32>(value) + DITHER_MATRIX[i][j]) >> 3;
lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
}
}
}
return lut;
}();
const DrawRectangleFunctionTable* DrawRectangleFunctions = nullptr;
const DrawTriangleFunctionTable* DrawTriangleFunctions = nullptr;
const DrawLineFunctionTable* DrawLineFunctions = nullptr;
FillVRAMFunction FillVRAM = nullptr;
WriteVRAMFunction WriteVRAM = nullptr;
CopyVRAMFunction CopyVRAM = nullptr;
GPUDrawingArea g_drawing_area = {};
} // namespace GPU_SW_Rasterizer
void GPU_SW_Rasterizer::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit)
{
const u16* const src_row = &g_vram[reg.GetYBase() * VRAM_WIDTH];
const u32 start_x = reg.GetXBase();
if (!clut_is_8bit)
{
// Wraparound can't happen in 4-bit mode.
std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * 16);
}
else
{
if ((start_x + 256) > VRAM_WIDTH) [[unlikely]]
{
const u32 end = VRAM_WIDTH - start_x;
const u32 start = 256 - end;
std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * end);
std::memcpy(g_gpu_clut + end, src_row, sizeof(u16) * start);
}
else
{
std::memcpy(g_gpu_clut, &src_row[start_x], sizeof(u16) * 256);
}
}
}
// Default scalar implementation definitions.
namespace GPU_SW_Rasterizer::Scalar {
namespace {
#include "gpu_sw_rasterizer.inl"
}
} // namespace GPU_SW_Rasterizer::Scalar
// Default vector implementation definitions.
#if defined(CPU_ARCH_SSE) || defined(CPU_ARCH_NEON)
namespace GPU_SW_Rasterizer::SIMD {
namespace {
#define USE_VECTOR 1
#include "gpu_sw_rasterizer.inl"
#undef USE_VECTOR
} // namespace
} // namespace GPU_SW_Rasterizer::SIMD
#endif
// Declare alternative implementations.
void GPU_SW_Rasterizer::SelectImplementation()
{
static bool selected = false;
if (selected)
return;
selected = true;
#define SELECT_IMPLEMENTATION(isa) \
do \
{ \
INFO_LOG("Using " #isa " software rasterizer implementation."); \
DrawRectangleFunctions = &isa::DrawRectangleFunctions; \
DrawTriangleFunctions = &isa::DrawTriangleFunctions; \
DrawLineFunctions = &isa::DrawLineFunctions; \
FillVRAM = &isa::FillVRAMImpl; \
WriteVRAM = &isa::WriteVRAMImpl; \
CopyVRAM = &isa::CopyVRAMImpl; \
} while (0)
#if defined(CPU_ARCH_SSE) || defined(CPU_ARCH_NEON)
const char* use_isa = std::getenv("SW_USE_ISA");
// AVX2/256-bit path still has issues, and I need to make sure that it's not ODR'ing any shared
// symbols on top of the base symbols.
#if defined(CPU_ARCH_SSE) && defined(_MSC_VER) && 0
if (cpuinfo_has_x86_avx2() && (!use_isa || StringUtil::Strcasecmp(use_isa, "AVX2") == 0))
{
SELECT_IMPLEMENTATION(AVX2);
return;
}
#endif
if (!use_isa || StringUtil::Strcasecmp(use_isa, "SIMD") == 0)
{
SELECT_IMPLEMENTATION(SIMD);
return;
}
#endif
INFO_LOG("Using scalar software rasterizer implementation.");
SELECT_IMPLEMENTATION(Scalar);
#undef SELECT_IMPLEMENTATION
}