From 03d6d12b8aba9b3b39e32c10d08268232de1fdfe Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 13 Apr 2025 13:09:53 +1000 Subject: [PATCH] Image: Export RGBA8 conversion functions --- src/util/image.cpp | 202 +++++++++++++++++++++++---------------------- src/util/image.h | 5 +- 2 files changed, 108 insertions(+), 99 deletions(-) diff --git a/src/util/image.cpp b/src/util/image.cpp index d645a352c..eef925d5f 100644 --- a/src/util/image.cpp +++ b/src/util/image.cpp @@ -80,8 +80,8 @@ static const FormatHandler* GetFormatHandler(std::string_view extension) return nullptr; } -static void SwapBGRAToRGBA(void* pixels_out, u32 pixels_out_pitch, const void* pixels_in, u32 pixels_in_pitch, - u32 width, u32 height); +static void SwapBGRAToRGBA(void* RESTRICT pixels_out, u32 pixels_out_pitch, const void* RESTRICT pixels_in, + u32 pixels_in_pitch, u32 width, u32 height); Image::Image() = default; @@ -500,16 +500,17 @@ void SwapBGRAToRGBA(void* pixels_out, u32 pixels_out_pitch, const void* pixels_i } template -static void DecompressBC(Image& image_out, const Image& image_in) +static void DecompressBC(void* RESTRICT pixels_out, u32 pixels_out_pitch, const void* RESTRICT pixels_in, + u32 pixels_in_pitch, u32 width, u32 height) { constexpr u32 BC_BLOCK_SIZE = 4; constexpr u32 BC_BLOCK_BYTES = 16; - const u32 blocks_wide = image_in.GetBlocksWide(); - const u32 blocks_high = image_in.GetBlocksHigh(); + const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4; + const u32 blocks_high = Common::AlignUpPow2(height, 4) / 4; for (u32 y = 0; y < blocks_high; y++) { - const u8* block_in = image_in.GetRowPixels(y); + const u8* block_in = static_cast(pixels_in) + (y * pixels_in_pitch); for (u32 x = 0; x < blocks_wide; x++, block_in += BC_BLOCK_BYTES) { // decompress block @@ -517,20 +518,20 @@ static void DecompressBC(Image& image_out, const Image& image_in) { case ImageFormat::BC1: { - DecompressBlockBC1(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, image_out.GetPitch(), block_in, - image_out.GetPixels()); + DecompressBlockBC1(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, pixels_out_pitch, block_in, + static_cast(pixels_out)); } break; case ImageFormat::BC2: { - DecompressBlockBC2(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, image_out.GetPitch(), block_in, - image_out.GetPixels()); + DecompressBlockBC2(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, pixels_out_pitch, block_in, + static_cast(pixels_out)); } break; case ImageFormat::BC3: { - DecompressBlockBC3(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, image_out.GetPitch(), block_in, - image_out.GetPixels()); + DecompressBlockBC3(x * BC_BLOCK_SIZE, y * BC_BLOCK_SIZE, pixels_out_pitch, block_in, + static_cast(pixels_out)); } break; @@ -540,13 +541,14 @@ static void DecompressBC(Image& image_out, const Image& image_in) bc7decomp::unpack_bc7(block_in, reinterpret_cast(block_pixels_out)); // and write it to the new image - const u32* copy_in_ptr = block_pixels_out; - u8* copy_out_ptr = image_out.GetRowPixels(y * BC_BLOCK_SIZE) + (x * BC_BLOCK_SIZE * sizeof(u32)); + const u32* RESTRICT copy_in_ptr = block_pixels_out; + u8* RESTRICT copy_out_ptr = + static_cast(pixels_out) + (y * BC_BLOCK_SIZE * pixels_out_pitch) + (x * BC_BLOCK_SIZE * sizeof(u32)); for (u32 sy = 0; sy < 4; sy++) { std::memcpy(copy_out_ptr, copy_in_ptr, sizeof(u32) * BC_BLOCK_SIZE); copy_in_ptr += BC_BLOCK_SIZE; - copy_out_ptr += image_out.GetPitch(); + copy_out_ptr += pixels_out_pitch; } } break; @@ -565,39 +567,46 @@ std::optional Image::ConvertToRGBA8(Error* error) const return ret; } - switch (m_format) + ret = Image(m_width, m_height, ImageFormat::RGBA8); + if (!ConvertToRGBA8(ret->GetPixels(), ret->GetPitch(), m_pixels.get(), m_pitch, m_width, m_height, m_format, error)) + ret.reset(); + + return ret; +} + +bool Image::ConvertToRGBA8(void* RESTRICT pixels_out, u32 pixels_out_pitch, const void* RESTRICT pixels_in, + u32 pixels_in_pitch, u32 width, u32 height, ImageFormat format, Error* error) +{ + switch (format) { case ImageFormat::BGRA8: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - SwapBGRAToRGBA(ret->GetPixels(), ret->GetPitch(), m_pixels.get(), m_pitch, m_width, m_height); + SwapBGRAToRGBA(pixels_out, pixels_out_pitch, pixels_in, pixels_in_pitch, width, height); + return true; } - break; case ImageFormat::RGBA8: { - ret = Image(m_width, m_height, m_format, m_pixels.get(), m_pitch); + StringUtil::StrideMemCpy(pixels_out, pixels_out_pitch, pixels_in, pixels_in_pitch, sizeof(u32) * width, height); + return true; } - break; case ImageFormat::RGB565: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - constexpr u32 pixels_per_vec = 8; - [[maybe_unused]] const u32 aligned_width = Common::AlignDownPow2(m_width, pixels_per_vec); + [[maybe_unused]] const u32 aligned_width = Common::AlignDownPow2(width, pixels_per_vec); - for (u32 y = 0; y < m_height; y++) + for (u32 y = 0; y < height; y++) { - const u8* pixels_in = GetRowPixels(y); - u8* pixels_out = ret->GetRowPixels(y); + const u8* RESTRICT row_pixels_in = static_cast(pixels_in) + (y * pixels_in_pitch); + u8* RESTRICT row_pixels_out = static_cast(pixels_out) + (y * pixels_out_pitch); u32 x = 0; #ifdef CPU_ARCH_SIMD for (; x < aligned_width; x += pixels_per_vec) { - GSVector4i rgb565 = GSVector4i::load(pixels_in); - pixels_in += sizeof(u16) * pixels_per_vec; + GSVector4i rgb565 = GSVector4i::load(row_pixels_in); + row_pixels_in += sizeof(u16) * pixels_per_vec; GSVector4i r = rgb565.srl16<11>(); r = r.sll16<3>() | r.sll16<13>().srl16<13>(); @@ -613,51 +622,50 @@ std::optional Image::ConvertToRGBA8(Error* error) const const GSVector4i high = r.uph64().u16to32() | g.uph64().u16to32().sll32<8>() | b.uph64().u16to32().sll32<16>() | GSVector4i::cxpr(0xFF000000); - GSVector4i::store(pixels_out, low); - pixels_out += sizeof(GSVector4i); + GSVector4i::store(row_pixels_out, low); + row_pixels_out += sizeof(GSVector4i); - GSVector4i::store(pixels_out, high); - pixels_out += sizeof(GSVector4i); + GSVector4i::store(row_pixels_out, high); + row_pixels_out += sizeof(GSVector4i); } #endif DONT_VECTORIZE_THIS_LOOP - for (; x < m_width; x++) + for (; x < width; x++) { // RGB565 -> RGBA8 u16 pixel_in; - std::memcpy(&pixel_in, pixels_in, sizeof(u16)); - pixels_in += sizeof(u16); + std::memcpy(&pixel_in, row_pixels_in, sizeof(u16)); + row_pixels_in += sizeof(u16); const u8 r5 = Truncate8(pixel_in >> 11); const u8 g6 = Truncate8((pixel_in >> 5) & 0x3F); const u8 b5 = Truncate8(pixel_in & 0x1F); const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 2) | (g6 & 3)) << 8) | (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (0xFF000000u); - std::memcpy(pixels_out, &rgba8, sizeof(u32)); - pixels_out += sizeof(u32); + std::memcpy(row_pixels_out, &rgba8, sizeof(u32)); + row_pixels_out += sizeof(u32); } } + + return true; } - break; case ImageFormat::RGB5A1: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - constexpr u32 pixels_per_vec = 8; - [[maybe_unused]] const u32 aligned_width = Common::AlignDownPow2(m_width, pixels_per_vec); + [[maybe_unused]] const u32 aligned_width = Common::AlignDownPow2(width, pixels_per_vec); - for (u32 y = 0; y < m_height; y++) + for (u32 y = 0; y < height; y++) { - const u8* pixels_in = GetRowPixels(y); - u8* pixels_out = ret->GetRowPixels(y); + const u8* RESTRICT row_pixels_in = static_cast(pixels_in) + (y * pixels_in_pitch); + u8* RESTRICT row_pixels_out = static_cast(pixels_out) + (y * pixels_out_pitch); u32 x = 0; #ifdef CPU_ARCH_SIMD for (; x < aligned_width; x += pixels_per_vec) { - GSVector4i rgb5a1 = GSVector4i::load(pixels_in); - pixels_in += sizeof(u16) * pixels_per_vec; + GSVector4i rgb5a1 = GSVector4i::load(row_pixels_in); + row_pixels_in += sizeof(u16) * pixels_per_vec; GSVector4i r = rgb5a1.sll16<1>().srl16<11>(); r = r.sll16<3>() | r.sll16<13>().srl16<13>(); @@ -675,52 +683,51 @@ std::optional Image::ConvertToRGBA8(Error* error) const const GSVector4i high = r.uph64().u16to32() | g.uph64().u16to32().sll32<8>() | b.uph64().u16to32().sll32<16>() | a.uph64().u16to32().sll32<24>(); - GSVector4i::store(pixels_out, low); - pixels_out += sizeof(GSVector4i); + GSVector4i::store(row_pixels_out, low); + row_pixels_out += sizeof(GSVector4i); - GSVector4i::store(pixels_out, high); - pixels_out += sizeof(GSVector4i); + GSVector4i::store(row_pixels_out, high); + row_pixels_out += sizeof(GSVector4i); } #endif DONT_VECTORIZE_THIS_LOOP - for (; x < m_width; x++) + for (; x < width; x++) { // RGB5A1 -> RGBA8 u16 pixel_in; - std::memcpy(&pixel_in, pixels_in, sizeof(u16)); - pixels_in += sizeof(u16); + std::memcpy(&pixel_in, row_pixels_in, sizeof(u16)); + row_pixels_in += sizeof(u16); const u8 a1 = Truncate8(pixel_in >> 15); const u8 r5 = Truncate8((pixel_in >> 10) & 0x1F); const u8 g6 = Truncate8((pixel_in >> 5) & 0x1F); const u8 b5 = Truncate8(pixel_in & 0x1F); const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 3) | (g6 & 7)) << 8) | (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (a1 ? 0xFF000000u : 0u); - std::memcpy(pixels_out, &rgba8, sizeof(u32)); - pixels_out += sizeof(u32); + std::memcpy(row_pixels_out, &rgba8, sizeof(u32)); + row_pixels_out += sizeof(u32); } } + + return true; } - break; case ImageFormat::A1BGR5: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - constexpr u32 pixels_per_vec = 8; - [[maybe_unused]] const u32 aligned_width = Common::AlignDownPow2(m_width, pixels_per_vec); + [[maybe_unused]] const u32 aligned_width = Common::AlignDownPow2(width, pixels_per_vec); - for (u32 y = 0; y < m_height; y++) + for (u32 y = 0; y < height; y++) { - const u8* pixels_in = GetRowPixels(y); - u8* pixels_out = ret->GetRowPixels(y); + const u8* RESTRICT row_pixels_in = static_cast(pixels_in) + (y * pixels_in_pitch); + u8* RESTRICT row_pixels_out = static_cast(pixels_out) + (y * pixels_out_pitch); u32 x = 0; #ifdef CPU_ARCH_SIMD for (; x < aligned_width; x += pixels_per_vec) { - GSVector4i a1bgr5 = GSVector4i::load(pixels_in); - pixels_in += sizeof(u16) * pixels_per_vec; + GSVector4i a1bgr5 = GSVector4i::load(row_pixels_in); + row_pixels_in += sizeof(u16) * pixels_per_vec; GSVector4i r = a1bgr5.srl16<11>(); r = r.sll16<3>() | r.sll16<13>().srl16<13>(); @@ -738,91 +745,90 @@ std::optional Image::ConvertToRGBA8(Error* error) const const GSVector4i high = r.uph64().u16to32() | g.uph64().u16to32().sll32<8>() | b.uph64().u16to32().sll32<16>() | a.uph64().u16to32().sll32<24>(); - GSVector4i::store(pixels_out, low); - pixels_out += sizeof(GSVector4i); + GSVector4i::store(row_pixels_out, low); + row_pixels_out += sizeof(GSVector4i); - GSVector4i::store(pixels_out, high); - pixels_out += sizeof(GSVector4i); + GSVector4i::store(row_pixels_out, high); + row_pixels_out += sizeof(GSVector4i); } #endif DONT_VECTORIZE_THIS_LOOP - for (; x < m_width; x++) + for (; x < width; x++) { // RGB5A1 -> RGBA8 u16 pixel_in; - std::memcpy(&pixel_in, pixels_in, sizeof(u16)); - pixels_in += sizeof(u16); + std::memcpy(&pixel_in, row_pixels_in, sizeof(u16)); + row_pixels_in += sizeof(u16); const u8 a1 = Truncate8(pixel_in & 0x01); const u8 r5 = Truncate8((pixel_in >> 11) & 0x1F); const u8 g6 = Truncate8((pixel_in >> 6) & 0x1F); const u8 b5 = Truncate8((pixel_in >> 1) & 0x1F); const u32 rgba8 = ZeroExtend32((r5 << 3) | (r5 & 7)) | (ZeroExtend32((g6 << 3) | (g6 & 7)) << 8) | (ZeroExtend32((b5 << 3) | (b5 & 7)) << 16) | (a1 ? 0xFF000000u : 0u); - std::memcpy(pixels_out, &rgba8, sizeof(u32)); - pixels_out += sizeof(u32); + std::memcpy(row_pixels_out, &rgba8, sizeof(u32)); + row_pixels_out += sizeof(u32); } } + + return true; } - break; case ImageFormat::BGR8: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - for (u32 y = 0; y < m_height; y++) + for (u32 y = 0; y < height; y++) { - const u8* pixels_in = GetRowPixels(y); - u8* pixels_out = ret->GetRowPixels(y); + const u8* RESTRICT row_pixels_in = static_cast(pixels_in) + (y * pixels_in_pitch); + u8* RESTRICT row_pixels_out = static_cast(pixels_out) + (y * pixels_out_pitch); - for (u32 x = 0; x < m_width; x++) + for (u32 x = 0; x < width; x++) { // Set alpha channel to full intensity. - const u32 rgba = (ZeroExtend32(pixels_in[0]) | (ZeroExtend32(pixels_in[2]) << 8) | - (ZeroExtend32(pixels_in[2]) << 16) | 0xFF000000u); - std::memcpy(pixels_out, &rgba, sizeof(rgba)); - pixels_in += 3; - pixels_out += sizeof(rgba); + const u32 rgba = (ZeroExtend32(row_pixels_in[0]) | (ZeroExtend32(row_pixels_in[2]) << 8) | + (ZeroExtend32(row_pixels_in[2]) << 16) | 0xFF000000u); + std::memcpy(row_pixels_out, &rgba, sizeof(rgba)); + row_pixels_in += 3; + row_pixels_out += sizeof(rgba); } } + + return true; } break; case ImageFormat::BC1: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - DecompressBC(ret.value(), *this); + DecompressBC(pixels_out, pixels_out_pitch, pixels_in, pixels_in_pitch, width, height); + return true; } break; case ImageFormat::BC2: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - DecompressBC(ret.value(), *this); + DecompressBC(pixels_out, pixels_out_pitch, pixels_in, pixels_in_pitch, width, height); + return true; } break; case ImageFormat::BC3: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - DecompressBC(ret.value(), *this); + DecompressBC(pixels_out, pixels_out_pitch, pixels_in, pixels_in_pitch, width, height); + return true; } break; case ImageFormat::BC7: { - ret = Image(m_width, m_height, ImageFormat::RGBA8); - DecompressBC(ret.value(), *this); + DecompressBC(pixels_out, pixels_out_pitch, pixels_in, pixels_in_pitch, width, height); + return true; } - break; default: { - Error::SetStringFmt(error, "Unhandled format {}", GetFormatName(m_format)); + Error::SetStringFmt(error, "Unhandled format {}", GetFormatName(format)); + return false; } - break; } - - return ret; } void Image::FlipY() diff --git a/src/util/image.h b/src/util/image.h index c68b0b49b..b2e3b0b2e 100644 --- a/src/util/image.h +++ b/src/util/image.h @@ -98,7 +98,10 @@ public: std::optional> SaveToBuffer(std::string_view filename, u8 quality = DEFAULT_SAVE_QUALITY, Error* error = nullptr) const; - std::optional ConvertToRGBA8(Error* error) const; + std::optional ConvertToRGBA8(Error* error = nullptr) const; + + static bool ConvertToRGBA8(void* RESTRICT pixels_out, u32 pixels_out_pitch, const void* RESTRICT pixels_in, + u32 pixels_in_pitch, u32 width, u32 height, ImageFormat format, Error* error = nullptr); void FlipY();