diff --git a/src/util/d3d11_device.cpp b/src/util/d3d11_device.cpp index c04a1e417..b5ea7c7a3 100644 --- a/src/util/d3d11_device.cpp +++ b/src/util/d3d11_device.cpp @@ -200,6 +200,13 @@ void D3D11Device::SetFeatures(FeatureMask disabled_features) (SUCCEEDED(m_device->CheckFeatureSupport(D3D11_FEATURE_D3D11_OPTIONS2, &data, sizeof(data))) && data.ROVsSupported); } + + m_features.dxt_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && + (SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) && + SupportsTextureFormat(GPUTexture::Format::BC3))); + m_features.bptc_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7)); } D3D11SwapChain::D3D11SwapChain(const WindowInfo& wi, GPUVSyncMode vsync_mode, bool allow_present_throttle, diff --git a/src/util/d3d11_texture.cpp b/src/util/d3d11_texture.cpp index 78b441069..cd5fd2931 100644 --- a/src/util/d3d11_texture.cpp +++ b/src/util/d3d11_texture.cpp @@ -147,23 +147,24 @@ bool D3D11Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, if (HasFlag(Flags::AllowMap)) { void* map; - u32 map_stride; - if (!Map(&map, &map_stride, x, y, width, height, layer, level)) + u32 map_pitch; + if (!Map(&map, &map_pitch, x, y, width, height, layer, level)) return false; - StringUtil::StrideMemCpy(map, map_stride, data, pitch, GetPixelSize() * width, height); + CopyTextureDataForUpload(width, height, m_format, map, map_pitch, data, pitch); Unmap(); return true; } - const CD3D11_BOX box(static_cast(x), static_cast(y), 0, static_cast(x + width), - static_cast(y + height), 1); + const u32 bs = GetBlockSize(); + const D3D11_BOX box = {Common::AlignDownPow2(x, bs), Common::AlignDownPow2(y, bs), 0U, + Common::AlignUpPow2(x + width, bs), Common::AlignUpPow2(y + height, bs), 1U}; const u32 srnum = D3D11CalcSubresource(level, layer, m_levels); ID3D11DeviceContext1* context = D3D11Device::GetD3DContext(); CommitClear(context); - GPUDevice::GetStatistics().buffer_streamed += height * pitch; + GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, pitch); GPUDevice::GetStatistics().num_uploads++; context->UpdateSubresource(m_texture.Get(), srnum, &box, data, pitch, 0); @@ -194,10 +195,18 @@ bool D3D11Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 return false; } - GPUDevice::GetStatistics().buffer_streamed += height * sr.RowPitch; + GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(height, sr.RowPitch); GPUDevice::GetStatistics().num_uploads++; - *map = static_cast(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize()); + if (IsCompressedFormat(m_format)) + { + *map = static_cast(sr.pData) + ((y / GetBlockSize()) * sr.RowPitch) + + ((x / GetBlockSize()) * GetPixelSize()); + } + else + { + *map = static_cast(sr.pData) + (y * sr.RowPitch) + (x * GetPixelSize()); + } *map_stride = sr.RowPitch; m_mapped_subresource = srnum; m_state = GPUTexture::State::Dirty; @@ -294,7 +303,7 @@ std::unique_ptr D3D11Texture::Create(ID3D11Device* device, u32 wid if (initial_data) { - GPUDevice::GetStatistics().buffer_streamed += height * initial_data_stride; + GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, initial_data_stride); GPUDevice::GetStatistics().num_uploads++; } diff --git a/src/util/d3d12_device.cpp b/src/util/d3d12_device.cpp index 964d067cc..f0e770010 100644 --- a/src/util/d3d12_device.cpp +++ b/src/util/d3d12_device.cpp @@ -1366,6 +1366,13 @@ void D3D12Device::SetFeatures(D3D_FEATURE_LEVEL feature_level, FeatureMask disab SUCCEEDED(m_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options))) && options.ROVsSupported; } + + m_features.dxt_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && + (SupportsTextureFormat(GPUTexture::Format::BC1) && SupportsTextureFormat(GPUTexture::Format::BC2) && + SupportsTextureFormat(GPUTexture::Format::BC3))); + m_features.bptc_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && SupportsTextureFormat(GPUTexture::Format::BC7)); } void D3D12Device::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, diff --git a/src/util/d3d12_texture.cpp b/src/util/d3d12_texture.cpp index 4305f7afd..1d7500dfd 100644 --- a/src/util/d3d12_texture.cpp +++ b/src/util/d3d12_texture.cpp @@ -340,24 +340,24 @@ ID3D12GraphicsCommandList4* D3D12Texture::GetCommandBufferForUpdate() return dev.GetInitCommandList(); } -void D3D12Texture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, - u32 upload_pitch) const -{ - StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height); -} - ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, - u32 height) const + u32 height, u32 buffer_size) const { - const u32 size = upload_pitch * height; ComPtr resource; ComPtr allocation; const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE, nullptr, nullptr}; - const D3D12_RESOURCE_DESC resource_desc = { - D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, - D3D12_RESOURCE_FLAG_NONE}; + const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, + 0, + buffer_size, + 1, + 1, + 1, + DXGI_FORMAT_UNKNOWN, + {1, 0}, + D3D12_TEXTURE_LAYOUT_ROW_MAJOR, + D3D12_RESOURCE_FLAG_NONE}; HRESULT hr = D3D12Device::GetInstance().GetAllocator()->CreateResource( &allocation_desc, &resource_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.GetAddressOf(), IID_PPV_ARGS(resource.GetAddressOf())); @@ -375,9 +375,9 @@ ID3D12Resource* D3D12Texture::AllocateUploadStagingBuffer(const void* data, u32 return nullptr; } - CopyTextureDataForUpload(map_ptr, data, width, height, pitch, upload_pitch); + CopyTextureDataForUpload(width, height, m_format, map_ptr, upload_pitch, data, pitch); - const D3D12_RANGE write_range = {0, size}; + const D3D12_RANGE write_range = {0, buffer_size}; resource->Unmap(0, &write_range); // Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy. @@ -395,8 +395,8 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, D3D12Device& dev = D3D12Device::GetInstance(); D3D12StreamBuffer& sbuffer = dev.GetTextureUploadBuffer(); - const u32 upload_pitch = Common::AlignUpPow2(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 required_size = height * upload_pitch; + const u32 upload_pitch = Common::AlignUpPow2(CalcUploadPitch(width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 required_size = CalcUploadSize(height, upload_pitch); D3D12_TEXTURE_COPY_LOCATION srcloc; srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; @@ -410,7 +410,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, // Otherwise allocation will either fail, or require lots of cmdbuffer submissions. if (required_size > (sbuffer.GetSize() / 2)) { - srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height); + srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size); if (!srcloc.pResource) return false; @@ -431,7 +431,7 @@ bool D3D12Texture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, srcloc.pResource = sbuffer.GetBuffer(); srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset(); - CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch); + CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch); sbuffer.CommitMemory(required_size); } @@ -482,8 +482,8 @@ bool D3D12Texture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 CommitClear(GetCommandBufferForUpdate()); // see note in Update() for the reason why. - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 req_size = height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(m_height, aligned_pitch); D3D12StreamBuffer& buffer = dev.GetTextureUploadBuffer(); if (req_size >= (buffer.GetSize() / 2)) return false; @@ -512,8 +512,8 @@ void D3D12Texture::Unmap() { D3D12Device& dev = D3D12Device::GetInstance(); D3D12StreamBuffer& sb = dev.GetTextureUploadBuffer(); - const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); - const u32 req_size = m_map_height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch); const u32 offset = sb.GetCurrentOffset(); sb.CommitMemory(req_size); diff --git a/src/util/d3d12_texture.h b/src/util/d3d12_texture.h index 051d1ca0d..96ccd0f3d 100644 --- a/src/util/d3d12_texture.h +++ b/src/util/d3d12_texture.h @@ -80,8 +80,7 @@ private: ID3D12GraphicsCommandList4* GetCommandBufferForUpdate(); ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, - u32 height) const; - void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const; + u32 height, u32 buffer_size) const; void ActuallyCommitClear(ID3D12GraphicsCommandList* cmdlist); ComPtr m_resource; diff --git a/src/util/d3d_common.cpp b/src/util/d3d_common.cpp index 7736bccca..e99d8ce46 100644 --- a/src/util/d3d_common.cpp +++ b/src/util/d3d_common.cpp @@ -650,6 +650,10 @@ static constexpr std::array(GPUTe {DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA16F {DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_UNKNOWN }, // RGBA32F {DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_UNKNOWN }, // RGB10A2 + {DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC1 + {DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC2 + {DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC3 + {DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN }, // BC7 // clang-format on }}; diff --git a/src/util/gpu_device.cpp b/src/util/gpu_device.cpp index 44e3faf50..bd7acad9e 100644 --- a/src/util/gpu_device.cpp +++ b/src/util/gpu_device.cpp @@ -1057,8 +1057,22 @@ std::unique_ptr GPUDevice::FetchAndUploadTextureImage(const Image& i { const Image* image_to_upload = ℑ GPUTexture::Format gpu_format = GPUTexture::GetTextureFormatForImageFormat(image.GetFormat()); + bool gpu_format_supported; + + // avoid device query for compressed formats that we've already pretested + if (gpu_format >= GPUTexture::Format::BC1 && gpu_format <= GPUTexture::Format::BC3) + gpu_format_supported = m_features.dxt_textures; + else if (gpu_format == GPUTexture::Format::BC7) + gpu_format_supported = m_features.bptc_textures; + else if (gpu_format == GPUTexture::Format::RGBA8) // always supported + gpu_format_supported = true; + else if (gpu_format != GPUTexture::Format::Unknown) + gpu_format_supported = SupportsTextureFormat(gpu_format); + else + gpu_format_supported = false; + std::optional converted_image; - if (!SupportsTextureFormat(gpu_format)) + if (!gpu_format_supported) { converted_image = image.ConvertToRGBA8(error); if (!converted_image.has_value()) diff --git a/src/util/gpu_device.h b/src/util/gpu_device.h index 02ae401c7..cd66311a8 100644 --- a/src/util/gpu_device.h +++ b/src/util/gpu_device.h @@ -515,6 +515,7 @@ public: FEATURE_MASK_TEXTURE_COPY_TO_SELF = (1 << 6), FEATURE_MASK_MEMORY_IMPORT = (1 << 7), FEATURE_MASK_RASTER_ORDER_VIEWS = (1 << 8), + FEATURE_MASK_COMPRESSED_TEXTURES = (1 << 9), }; enum class DrawBarrier : u32 @@ -553,6 +554,8 @@ public: bool pipeline_cache : 1; bool prefer_unused_textures : 1; bool raster_order_views : 1; + bool dxt_textures : 1; + bool bptc_textures : 1; }; struct Statistics diff --git a/src/util/gpu_texture.cpp b/src/util/gpu_texture.cpp index a913d33b4..c10e6faff 100644 --- a/src/util/gpu_texture.cpp +++ b/src/util/gpu_texture.cpp @@ -25,7 +25,7 @@ GPUTexture::~GPUTexture() const char* GPUTexture::GetFormatName(Format format) { - static constexpr const char* format_names[static_cast(Format::MaxCount)] = { + static constexpr const std::array(Format::MaxCount)> format_names = {{ "Unknown", // Unknown "RGBA8", // RGBA8 "BGRA8", // BGRA8 @@ -51,43 +51,35 @@ const char* GPUTexture::GetFormatName(Format format) "RGBA16F", // RGBA16F "RGBA32F", // RGBA32F "RGB10A2", // RGB10A2 - }; + "BC1", // BC1 + "BC2", // BC2 + "BC3", // BC3 + "BC7", // BC7 + }}; return format_names[static_cast(format)]; } -u32 GPUTexture::GetCompressedBytesPerBlock() const +u32 GPUTexture::GetBlockSize() const { - return GetCompressedBytesPerBlock(m_format); + return GetBlockSize(m_format); } -u32 GPUTexture::GetCompressedBytesPerBlock(Format format) +u32 GPUTexture::GetBlockSize(Format format) { - // TODO: Implement me - return GetPixelSize(format); -} - -u32 GPUTexture::GetCompressedBlockSize() const -{ - return GetCompressedBlockSize(m_format); -} - -u32 GPUTexture::GetCompressedBlockSize(Format format) -{ - // TODO: Implement me - /*if (format >= Format::BC1 && format <= Format::BC7) + if (format >= Format::BC1 && format <= Format::BC7) return 4; - else*/ - return 1; + else + return 1; } u32 GPUTexture::CalcUploadPitch(Format format, u32 width) { - /* + // convert to blocks if (format >= Format::BC1 && format <= Format::BC7) width = Common::AlignUpPow2(width, 4) / 4; - */ - return width * GetCompressedBytesPerBlock(format); + + return width * GetPixelSize(format); } u32 GPUTexture::CalcUploadPitch(u32 width) const @@ -102,9 +94,11 @@ u32 GPUTexture::CalcUploadRowLengthFromPitch(u32 pitch) const u32 GPUTexture::CalcUploadRowLengthFromPitch(Format format, u32 pitch) { - const u32 block_size = GetCompressedBlockSize(format); - const u32 bytes_per_block = GetCompressedBytesPerBlock(format); - return ((pitch + (bytes_per_block - 1)) / bytes_per_block) * block_size; + const u32 pixel_size = GetPixelSize(format); + if (IsCompressedFormat(format)) + return (Common::AlignUpPow2(pitch, pixel_size) / pixel_size) * 4; + else + return pitch / pixel_size; } u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const @@ -114,36 +108,64 @@ u32 GPUTexture::CalcUploadSize(u32 height, u32 pitch) const u32 GPUTexture::CalcUploadSize(Format format, u32 height, u32 pitch) { - const u32 block_size = GetCompressedBlockSize(format); + const u32 block_size = GetBlockSize(format); return pitch * ((static_cast(height) + (block_size - 1)) / block_size); } +bool GPUTexture::IsCompressedFormat(Format format) +{ + return (format >= Format::BC1); +} + +bool GPUTexture::IsCompressedFormat() const +{ + return IsCompressedFormat(m_format); +} + u32 GPUTexture::GetFullMipmapCount(u32 width, u32 height) { const u32 max_dim = Common::PreviousPow2(std::max(width, height)); return (std::countr_zero(max_dim) + 1); } +void GPUTexture::CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch, + const void* src, u32 src_pitch) +{ + if (IsCompressedFormat(format)) + { + const u32 blocks_wide = Common::AlignUpPow2(width, 4) / 4; + const u32 blocks_high = Common::AlignUpPow2(height, 4) / 4; + const u32 block_size = GetPixelSize(format); + StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, block_size * blocks_wide, blocks_high); + } + else + { + StringUtil::StrideMemCpy(dst, dst_pitch, src, src_pitch, width * GetPixelSize(format), height); + } +} + GPUTexture::Format GPUTexture::GetTextureFormatForImageFormat(ImageFormat format) { - static constexpr const std::array(ImageFormat::MaxCount)> mapping = {{ - Format::Unknown, // None - Format::RGBA8, // RGBA8 - Format::BGRA8, // BGRA8 - Format::RGB565, // RGB565 - Format::Unknown, // RGBA5551 - Format::Unknown, // BC1 - Format::Unknown, // BC2 - Format::Unknown, // BC3 - Format::Unknown, // BC7 - }}; + static constexpr const std::array mapping = { + Format::Unknown, // None + Format::RGBA8, // RGBA8 + Format::BGRA8, // BGRA8 + Format::RGB565, // RGB565 + Format::RGBA5551, // RGBA5551 + Format::Unknown, // BGR8 + Format::BC1, // BC1 + Format::BC2, // BC2 + Format::BC3, // BC3 + Format::BC7, // BC7 + }; + static_assert(mapping.size() == static_cast(ImageFormat::MaxCount)); return mapping[static_cast(format)]; } ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format) { - static constexpr const std::array(Format::MaxCount)> mapping = {{ + static constexpr const std::array mapping = { ImageFormat::None, // Unknown ImageFormat::RGBA8, // RGBA8 ImageFormat::BGRA8, // BGRA8 @@ -169,7 +191,12 @@ ImageFormat GPUTexture::GetImageFormatForTextureFormat(Format format) ImageFormat::None, // RGBA16F ImageFormat::None, // RGBA32F ImageFormat::None, // RGB10A2 - }}; + ImageFormat::BC1, // BC1 + ImageFormat::BC2, // BC2 + ImageFormat::BC3, // BC3 + ImageFormat::BC7, // BC7 + }; + static_assert(mapping.size() == static_cast(Format::MaxCount)); return mapping[static_cast(format)]; } @@ -226,6 +253,10 @@ u32 GPUTexture::GetPixelSize(GPUTexture::Format format) 8, // RGBA16F 16, // RGBA32F 4, // RGB10A2 + 8, // BC1 - 16 pixels in 64 bits + 16, // BC2 - 16 pixels in 128 bits + 16, // BC3 - 16 pixels in 128 bits + 16, // BC4 - 16 pixels in 128 bits }}; return sizes[static_cast(format)]; @@ -241,12 +272,6 @@ bool GPUTexture::IsDepthStencilFormat(Format format) return (format == Format::D24S8 || format == Format::D32FS8); } -bool GPUTexture::IsCompressedFormat(Format format) -{ - // TODO: Implement me - return false; -} - bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, Flags flags, Error* error) { @@ -318,6 +343,12 @@ bool GPUTexture::ValidateConfig(u32 width, u32 height, u32 layers, u32 levels, u return false; } + if (IsCompressedFormat(format) && (type != Type::Texture || ((flags & Flags::AllowBindAsImage) != Flags::None))) + { + Error::SetStringView(error, "Compressed formats are only supported for textures."); + return false; + } + return true; } diff --git a/src/util/gpu_texture.h b/src/util/gpu_texture.h index c85113a91..700d6dca3 100644 --- a/src/util/gpu_texture.h +++ b/src/util/gpu_texture.h @@ -61,7 +61,11 @@ public: RGBA16F, RGBA32F, RGB10A2, - MaxCount + BC1, ///< BC1, aka DXT1 compressed texture + BC2, ///< BC2, aka DXT2/3 compressed texture + BC3, ///< BC3, aka DXT4/5 compressed texture + BC7, ///< BC7, aka BPTC compressed texture + MaxCount, }; enum class State : u8 @@ -95,12 +99,13 @@ public: static bool IsDepthFormat(Format format); static bool IsDepthStencilFormat(Format format); static bool IsCompressedFormat(Format format); - static u32 GetCompressedBytesPerBlock(Format format); - static u32 GetCompressedBlockSize(Format format); + static u32 GetBlockSize(Format format); static u32 CalcUploadPitch(Format format, u32 width); static u32 CalcUploadRowLengthFromPitch(Format format, u32 pitch); static u32 CalcUploadSize(Format format, u32 height, u32 pitch); static u32 GetFullMipmapCount(u32 width, u32 height); + static void CopyTextureDataForUpload(u32 width, u32 height, Format format, void* dst, u32 dst_pitch, const void* src, + u32 src_pitch); static Format GetTextureFormatForImageFormat(ImageFormat format); static ImageFormat GetImageFormatForTextureFormat(Format format); @@ -160,8 +165,8 @@ public: size_t GetVRAMUsage() const; - u32 GetCompressedBytesPerBlock() const; - u32 GetCompressedBlockSize() const; + bool IsCompressedFormat() const; + u32 GetBlockSize() const; u32 CalcUploadPitch(u32 width) const; u32 CalcUploadRowLengthFromPitch(u32 pitch) const; u32 CalcUploadSize(u32 height, u32 pitch) const; diff --git a/src/util/image.cpp b/src/util/image.cpp index c3f5dc212..4a53fa4df 100644 --- a/src/util/image.cpp +++ b/src/util/image.cpp @@ -46,6 +46,10 @@ static bool WebPBufferSaver(const Image& image, DynamicHeapArray* data, u8 q static bool WebPFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error); static bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, u8 quality, Error* error); +static bool DDSBufferLoader(Image* image, std::span data, Error* error); +static bool DDSFileLoader(Image* image, std::string_view filename, std::FILE* fp, Error* error); + +namespace { struct FormatHandler { const char* extension; @@ -54,12 +58,14 @@ struct FormatHandler bool (*file_loader)(Image*, std::string_view, std::FILE*, Error*); bool (*file_saver)(const Image&, std::string_view, std::FILE*, u8, Error*); }; +} // namespace static constexpr FormatHandler s_format_handlers[] = { {"png", PNGBufferLoader, PNGBufferSaver, PNGFileLoader, PNGFileSaver}, {"jpg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver}, {"jpeg", JPEGBufferLoader, JPEGBufferSaver, JPEGFileLoader, JPEGFileSaver}, {"webp", WebPBufferLoader, WebPBufferSaver, WebPFileLoader, WebPFileSaver}, + {"dds", DDSBufferLoader, nullptr, DDSFileLoader, nullptr}, }; static const FormatHandler* GetFormatHandler(std::string_view extension) @@ -155,17 +161,19 @@ Image& Image::operator=(Image&& move) const char* Image::GetFormatName(ImageFormat format) { - static constexpr std::array(ImageFormat::MaxCount)> names = { + static constexpr std::array names = { "None", // None "RGBA8", // RGBA8 "BGRA8", // BGRA8 "RGB565", // RGB565 "RGB5551", // RGBA5551 + "BGR8", // BGR8 "BC1", // BC1 "BC2", // BC2 "BC3", // BC3 "BC7", // BC7 }; + static_assert(names.size() == static_cast(ImageFormat::MaxCount)); return names[static_cast(format)]; } @@ -178,6 +186,7 @@ u32 Image::GetPixelSize(ImageFormat format) 4, // BGRA8 2, // RGB565 2, // RGBA5551 + 3, // BGR8 8, // BC1 - 16 pixels in 64 bits 16, // BC2 - 16 pixels in 128 bits 16, // BC3 - 16 pixels in 128 bits @@ -563,6 +572,27 @@ std::optional Image::ConvertToRGBA8(Error* error) const } } } + break; + + case ImageFormat::BGR8: + { + ret = Image(m_width, m_height, ImageFormat::RGBA8); + for (u32 y = 0; y < m_height; y++) + { + const u8* pixels_in = GetRowPixels(y); + u8* pixels_out = ret->GetRowPixels(y); + + for (u32 x = 0; x < m_width; x++) + { + // Set alpha channel to full intensity. + const u32 rgba = (ZeroExtend32(pixels_in[0]) | (ZeroExtend32(pixels_in[2]) << 8) | + (ZeroExtend32(pixels_in[2]) << 16) | 0xFF000000u); + std::memcpy(pixels_out, &rgba, sizeof(rgba)); + pixels_in += 3; + pixels_out += sizeof(rgba); + } + } + } break; // TODO: Block format decompression @@ -1220,3 +1250,415 @@ bool WebPFileSaver(const Image& image, std::string_view filename, std::FILE* fp, return true; } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// DDS Handler +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// From https://raw.githubusercontent.com/Microsoft/DirectXTex/master/DirectXTex/DDS.h +// +// This header defines constants and structures that are useful when parsing +// DDS files. DDS files were originally designed to use several structures +// and constants that are native to DirectDraw and are defined in ddraw.h, +// such as DDSURFACEDESC2 and DDSCAPS2. This file defines similar +// (compatible) constants and structures so that one can use DDS files +// without needing to include ddraw.h. +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// +// http://go.microsoft.com/fwlink/?LinkId=248926 + +#pragma pack(push, 1) + +static constexpr uint32_t DDS_MAGIC = 0x20534444; // "DDS " + +struct DDS_PIXELFORMAT +{ + uint32_t dwSize; + uint32_t dwFlags; + uint32_t dwFourCC; + uint32_t dwRGBBitCount; + uint32_t dwRBitMask; + uint32_t dwGBitMask; + uint32_t dwBBitMask; + uint32_t dwABitMask; +}; + +#define DDS_FOURCC 0x00000004 // DDPF_FOURCC +#define DDS_RGB 0x00000040 // DDPF_RGB +#define DDS_RGBA 0x00000041 // DDPF_RGB | DDPF_ALPHAPIXELS +#define DDS_LUMINANCE 0x00020000 // DDPF_LUMINANCE +#define DDS_LUMINANCEA 0x00020001 // DDPF_LUMINANCE | DDPF_ALPHAPIXELS +#define DDS_ALPHA 0x00000002 // DDPF_ALPHA +#define DDS_PAL8 0x00000020 // DDPF_PALETTEINDEXED8 +#define DDS_PAL8A 0x00000021 // DDPF_PALETTEINDEXED8 | DDPF_ALPHAPIXELS +#define DDS_BUMPDUDV 0x00080000 // DDPF_BUMPDUDV + +#ifndef MAKEFOURCC +#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ + ((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) | \ + ((uint32_t)(uint8_t)(ch3) << 24)) +#endif /* defined(MAKEFOURCC) */ + +#define DDS_HEADER_FLAGS_TEXTURE 0x00001007 // DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT +#define DDS_HEADER_FLAGS_MIPMAP 0x00020000 // DDSD_MIPMAPCOUNT +#define DDS_HEADER_FLAGS_VOLUME 0x00800000 // DDSD_DEPTH +#define DDS_HEADER_FLAGS_PITCH 0x00000008 // DDSD_PITCH +#define DDS_HEADER_FLAGS_LINEARSIZE 0x00080000 // DDSD_LINEARSIZE +#define DDS_MAX_TEXTURE_SIZE 32768 + +// Subset here matches D3D10_RESOURCE_DIMENSION and D3D11_RESOURCE_DIMENSION +enum DDS_RESOURCE_DIMENSION +{ + DDS_DIMENSION_TEXTURE1D = 2, + DDS_DIMENSION_TEXTURE2D = 3, + DDS_DIMENSION_TEXTURE3D = 4, +}; + +struct DDS_HEADER +{ + uint32_t dwSize; + uint32_t dwFlags; + uint32_t dwHeight; + uint32_t dwWidth; + uint32_t dwPitchOrLinearSize; + uint32_t dwDepth; // only if DDS_HEADER_FLAGS_VOLUME is set in dwFlags + uint32_t dwMipMapCount; + uint32_t dwReserved1[11]; + DDS_PIXELFORMAT ddspf; + uint32_t dwCaps; + uint32_t dwCaps2; + uint32_t dwCaps3; + uint32_t dwCaps4; + uint32_t dwReserved2; +}; + +struct DDS_HEADER_DXT10 +{ + uint32_t dxgiFormat; + uint32_t resourceDimension; + uint32_t miscFlag; // see DDS_RESOURCE_MISC_FLAG + uint32_t arraySize; + uint32_t miscFlags2; // see DDS_MISC_FLAGS2 +}; + +#pragma pack(pop) + +static_assert(sizeof(DDS_HEADER) == 124, "DDS Header size mismatch"); +static_assert(sizeof(DDS_HEADER_DXT10) == 20, "DDS DX10 Extended Header size mismatch"); + +constexpr DDS_PIXELFORMAT DDSPF_A8R8G8B8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000}; +constexpr DDS_PIXELFORMAT DDSPF_X8R8G8B8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}; +constexpr DDS_PIXELFORMAT DDSPF_A8B8G8R8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGBA, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000}; +constexpr DDS_PIXELFORMAT DDSPF_X8B8G8R8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 32, 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000}; +constexpr DDS_PIXELFORMAT DDSPF_R8G8B8 = { + sizeof(DDS_PIXELFORMAT), DDS_RGB, 0, 24, 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000}; + +// End of Microsoft code from DDS.h. + +static bool DDSPixelFormatMatches(const DDS_PIXELFORMAT& pf1, const DDS_PIXELFORMAT& pf2) +{ + return std::tie(pf1.dwSize, pf1.dwFlags, pf1.dwFourCC, pf1.dwRGBBitCount, pf1.dwRBitMask, pf1.dwGBitMask, + pf1.dwGBitMask, pf1.dwBBitMask, + pf1.dwABitMask) == std::tie(pf2.dwSize, pf2.dwFlags, pf2.dwFourCC, pf2.dwRGBBitCount, pf2.dwRBitMask, + pf2.dwGBitMask, pf2.dwGBitMask, pf2.dwBBitMask, pf2.dwABitMask); +} + +struct DDSLoadInfo +{ + u32 block_size = 1; + u32 bytes_per_block = 4; + u32 width = 0; + u32 height = 0; + u32 mip_count = 0; + ImageFormat format = ImageFormat::RGBA8; + s64 base_image_offset = 0; + u32 base_image_size = 0; + u32 base_image_pitch = 0; + bool clear_alpha = false; +}; + +template +static bool ParseDDSHeader(const ReadFunction& RF, DDSLoadInfo* info, Error* error) +{ + u32 magic; + if (!RF(&magic, sizeof(magic), error) || magic != DDS_MAGIC) + { + Error::AddPrefix(error, "Failed to read magic: "); + return false; + } + + DDS_HEADER header; + u32 header_size = sizeof(header); + if (!RF(&header, header_size, error) || header.dwSize < header_size) + { + Error::AddPrefix(error, "Failed to read header: "); + return false; + } + + // We should check for DDS_HEADER_FLAGS_TEXTURE here, but some tools don't seem + // to set it (e.g. compressonator). But we can still validate the size. + if (header.dwWidth == 0 || header.dwWidth >= DDS_MAX_TEXTURE_SIZE || header.dwHeight == 0 || + header.dwHeight >= DDS_MAX_TEXTURE_SIZE) + { + Error::SetStringFmt(error, "Size is invalid: {}x{}", header.dwWidth, header.dwHeight); + return false; + } + + // Image should be 2D. + if (header.dwFlags & DDS_HEADER_FLAGS_VOLUME) + { + Error::SetStringView(error, "Volume textures are not supported."); + return false; + } + + // Presence of width/height fields is already tested by DDS_HEADER_FLAGS_TEXTURE. + info->width = header.dwWidth; + info->height = header.dwHeight; + + // Check for mip levels. + if (header.dwFlags & DDS_HEADER_FLAGS_MIPMAP) + { + info->mip_count = header.dwMipMapCount; + if (header.dwMipMapCount != 0) + { + info->mip_count = header.dwMipMapCount; + } + else + { + const u32 max_dim = Common::PreviousPow2(std::max(header.dwWidth, header.dwHeight)); + info->mip_count = (std::countr_zero(max_dim) + 1); + } + } + else + { + info->mip_count = 1; + } + + // Handle fourcc formats vs uncompressed formats. + const bool has_fourcc = (header.ddspf.dwFlags & DDS_FOURCC) != 0; + if (has_fourcc) + { + // Handle DX10 extension header. + u32 dxt10_format = 0; + if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', '1', '0')) + { + DDS_HEADER_DXT10 dxt10_header; + if (!RF(&dxt10_header, sizeof(dxt10_header), error)) + { + Error::AddPrefix(error, "Failed to read DXT10 header: "); + return false; + } + + // Can't handle array textures here. Doesn't make sense to use them, anyway. + if (dxt10_header.resourceDimension != DDS_DIMENSION_TEXTURE2D || dxt10_header.arraySize != 1) + { + Error::SetStringView(error, "Only 2D textures are supported."); + return false; + } + + header_size += sizeof(dxt10_header); + dxt10_format = dxt10_header.dxgiFormat; + } + + if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '1') || dxt10_format == 71) + { + info->format = ImageFormat::BC1; + info->block_size = 4; + info->bytes_per_block = 8; + } + else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '2') || + header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '3') || dxt10_format == 74) + { + info->format = ImageFormat::BC2; + info->block_size = 4; + info->bytes_per_block = 16; + } + else if (header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '4') || + header.ddspf.dwFourCC == MAKEFOURCC('D', 'X', 'T', '5') || dxt10_format == 77) + { + info->format = ImageFormat::BC3; + info->block_size = 4; + info->bytes_per_block = 16; + } + else if (dxt10_format == 98) + { + info->format = ImageFormat::BC7; + info->block_size = 4; + info->bytes_per_block = 16; + } + else + { + Error::SetStringFmt(error, "Unknown format with FOURCC 0x{:08X} / DXT10 format {}", header.ddspf.dwFourCC, + dxt10_format); + return false; + } + } + else + { + if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8R8G8B8)) + { + info->format = ImageFormat::BGRA8; + } + else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8R8G8B8)) + { + info->format = ImageFormat::BGRA8; + info->clear_alpha = true; + } + else if (DDSPixelFormatMatches(header.ddspf, DDSPF_X8B8G8R8)) + { + info->format = ImageFormat::RGBA8; + info->clear_alpha = true; + } + else if (DDSPixelFormatMatches(header.ddspf, DDSPF_R8G8B8)) + { + info->format = ImageFormat::BGR8; + info->clear_alpha = true; + } + else if (DDSPixelFormatMatches(header.ddspf, DDSPF_A8B8G8R8)) + { + info->format = ImageFormat::RGBA8; + } + else + { + Error::SetStringFmt(error, "Unhandled format with FOURCC 0x{:08X}", header.ddspf.dwFourCC); + return false; + } + + // All these formats are RGBA, just with byte swapping. + info->block_size = 1; + info->bytes_per_block = header.ddspf.dwRGBBitCount / 8; + } + + // Mip levels smaller than the block size are padded to multiples of the block size. + const u32 blocks_wide = Common::AlignUpPow2(info->width, info->block_size) / info->block_size; + const u32 blocks_high = Common::AlignUpPow2(info->height, info->block_size) / info->block_size; + + // Pitch can be specified in the header, otherwise we can derive it from the dimensions. For + // compressed formats, both DDS_HEADER_FLAGS_LINEARSIZE and DDS_HEADER_FLAGS_PITCH should be + // set. See https://msdn.microsoft.com/en-us/library/windows/desktop/bb943982(v=vs.85).aspx + if (header.dwFlags & DDS_HEADER_FLAGS_PITCH && header.dwFlags & DDS_HEADER_FLAGS_LINEARSIZE) + { + // Convert pitch (in bytes) to texels/row length. + if (header.dwPitchOrLinearSize < info->bytes_per_block) + { + // Likely a corrupted or invalid file. + Error::SetStringFmt(error, "Invalid pitch: {}", header.dwPitchOrLinearSize); + return false; + } + + info->base_image_pitch = header.dwPitchOrLinearSize; + info->base_image_size = info->base_image_pitch * blocks_high; + } + else + { + // Assume no padding between rows of blocks. + info->base_image_pitch = blocks_wide * info->bytes_per_block; + info->base_image_size = info->base_image_pitch * blocks_high; + } + + info->base_image_offset = sizeof(magic) + header_size; + +#if 0 + // D3D11 cannot handle block compressed textures where the first mip level is not a multiple of the block size. + if (mip_level == 0 && info.block_size > 1 && ((width % info.block_size) != 0 || (height % info.block_size) != 0)) + { + Error::SetStringFmt(error, + "Invalid dimensions for DDS texture. For compressed textures of this format, " + "the width/height of the first mip level must be a multiple of {}.", + info.block_size); + return false; + } +#endif + + return true; +} + +bool DDSFileLoader(Image* image, std::string_view path, std::FILE* fp, Error* error) +{ + const auto header_reader = [fp](void* buffer, size_t size, Error* error) { + if (std::fread(buffer, size, 1, fp) == 1) + return true; + + Error::SetErrno(error, "fread() failed: ", errno); + return false; + }; + + DDSLoadInfo info; + if (!ParseDDSHeader(header_reader, &info, error)) + return false; + + // always load the base image + if (!FileSystem::FSeek64(fp, info.base_image_offset, SEEK_SET, error)) + return false; + + image->Resize(info.width, info.height, info.format, false); + const u32 blocks = image->GetBlockYCount(); + if (image->GetPitch() != info.base_image_pitch) + { + for (u32 y = 0; y < blocks; y++) + { + if (std::fread(image->GetRowPixels(y), info.base_image_pitch, 1, fp) != 1) + { + Error::SetErrno(error, "fread() failed: ", errno); + return false; + } + } + } + else + { + if (std::fread(image->GetPixels(), info.base_image_pitch * blocks, 1, fp) != 1) + { + Error::SetErrno(error, "fread() failed: ", errno); + return false; + } + } + + if (info.clear_alpha) + image->SetAllPixelsOpaque(); + + return true; +} + +bool DDSBufferLoader(Image* image, std::span data, Error* error) +{ + size_t data_pos = 0; + const auto header_reader = [&data, &data_pos](void* buffer, size_t size, Error* error) { + if ((data_pos + size) > data.size()) + { + Error::SetStringView(error, "Buffer does not contain sufficient data."); + return false; + } + + std::memcpy(buffer, &data[data_pos], size); + data_pos += size; + return true; + }; + + DDSLoadInfo info; + if (!ParseDDSHeader(header_reader, &info, error)) + return false; + + if ((static_cast(info.base_image_offset) + info.base_image_size) > data.size()) + { + Error::SetStringFmt(error, "Buffer does not contain complete base image."); + return false; + } + + image->SetPixels(info.width, info.height, info.format, &data[static_cast(info.base_image_offset)], + info.base_image_pitch); + + if (info.clear_alpha) + image->SetAllPixelsOpaque(); + + return true; +} diff --git a/src/util/image.h b/src/util/image.h index 49a7334a1..6f30c0064 100644 --- a/src/util/image.h +++ b/src/util/image.h @@ -21,6 +21,7 @@ enum class ImageFormat : u8 BGRA8, RGB565, RGBA5551, + BGR8, BC1, BC2, BC3, diff --git a/src/util/metal_device.mm b/src/util/metal_device.mm index 251f49904..14584bbea 100644 --- a/src/util/metal_device.mm +++ b/src/util/metal_device.mm @@ -71,6 +71,11 @@ static constexpr std::array(GPUTexture::Format: MTLPixelFormatRGBA16Float, // RGBA16F MTLPixelFormatRGBA32Float, // RGBA32F MTLPixelFormatBGR10A2Unorm, // RGB10A2 + MTLPixelFormatBC1_RGBA, // BC1 + MTLPixelFormatBC2_RGBA, // BC2 + MTLPixelFormatBC3_RGBA, // BC3 + MTLPixelFormatBC7_RGBAUnorm, // BC7 + }; static void LogNSError(NSError* error, std::string_view message) @@ -385,6 +390,10 @@ void MetalDevice::SetFeatures(FeatureMask disabled_features) m_features.pipeline_cache = true; m_features.prefer_unused_textures = true; + // Same feature bit for both. + m_features.dxt_textures = m_features.bptc_textures = + !(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && m_device.supportsBCTextureCompression; + // Disable pipeline cache on Intel, apparently it's buggy. if ([[m_device name] containsString:@"Intel"]) { @@ -995,8 +1004,8 @@ MetalTexture::~MetalTexture() bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, u32 level /*= 0*/) { - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(height, aligned_pitch); GPUDevice::GetStatistics().buffer_streamed += req_size; GPUDevice::GetStatistics().num_uploads++; @@ -1013,7 +1022,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, actual_buffer = [dev.GetMTLDevice() newBufferWithBytes:data length:upload_size options:options]; actual_offset = 0; actual_pitch = pitch; - if (actual_buffer == nil) + if (actual_buffer == nil) [[unlikely]] { Panic("Failed to allocate temporary buffer."); return false; @@ -1026,7 +1035,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) { dev.SubmitCommandBuffer(); - if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) + if (!sb.ReserveMemory(req_size, TEXTURE_UPLOAD_ALIGNMENT)) [[unlikely]] { Panic("Failed to reserve texture upload space."); return false; @@ -1034,7 +1043,7 @@ bool MetalTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, } actual_offset = sb.GetCurrentOffset(); - StringUtil::StrideMemCpy(sb.GetCurrentHostPointer(), aligned_pitch, data, pitch, width * GetPixelSize(), height); + CopyTextureDataForUpload(width, height, m_format, sb.GetCurrentHostPointer(), aligned_pitch, data, pitch); sb.CommitMemory(req_size); actual_buffer = sb.GetBuffer(); actual_pitch = aligned_pitch; @@ -1065,8 +1074,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) return false; - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(height, aligned_pitch); MetalDevice& dev = MetalDevice::GetInstance(); if (m_state == GPUTexture::State::Cleared && (x != 0 || y != 0 || width != m_width || height != m_height)) @@ -1097,8 +1106,8 @@ bool MetalTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 void MetalTexture::Unmap() { - const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 req_size = m_map_height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch); GPUDevice::GetStatistics().buffer_streamed += req_size; GPUDevice::GetStatistics().num_uploads++; @@ -1488,6 +1497,11 @@ bool MetalDevice::SupportsTextureFormat(GPUTexture::Format format) const if (![m_device supportsFamily:MTLGPUFamilyApple2]) return false; } + else if (format >= GPUTexture::Format::BC1 && format <= GPUTexture::Format::BC7) + { + if (!m_device.supportsBCTextureCompression) + return false; + } return (s_pixel_format_mapping[static_cast(format)] != MTLPixelFormatInvalid); } diff --git a/src/util/opengl_device.cpp b/src/util/opengl_device.cpp index 97d169b15..79a1474d5 100644 --- a/src/util/opengl_device.cpp +++ b/src/util/opengl_device.cpp @@ -506,6 +506,12 @@ bool OpenGLDevice::CheckFeatures(FeatureMask disabled_features) m_features.shader_cache = false; + m_features.dxt_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && GLAD_GL_EXT_texture_compression_s3tc); + m_features.bptc_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && + (GLAD_GL_VERSION_4_2 || GLAD_GL_ARB_texture_compression_bptc || GLAD_GL_EXT_texture_compression_bptc)); + m_features.pipeline_cache = m_gl_context->IsGLES() || GLAD_GL_ARB_get_program_binary; if (m_features.pipeline_cache) { diff --git a/src/util/opengl_texture.cpp b/src/util/opengl_texture.cpp index 65e1d3ba9..f26b68c39 100644 --- a/src/util/opengl_texture.cpp +++ b/src/util/opengl_texture.cpp @@ -33,61 +33,69 @@ const std::tuple& OpenGLTexture::GetPixelFormatMapping(G { static constexpr std::array, static_cast(GPUTexture::Format::MaxCount)> mapping = {{ - {}, // Unknown - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 - {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16 - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U - {GL_R32F, GL_RED, GL_FLOAT}, // R32F - {GL_RG8, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // RG8 - {GL_RG16F, GL_RG, GL_UNSIGNED_SHORT}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F - {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA16 - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F - {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + {}, // Unknown + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 + {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16 + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U + {GL_R32F, GL_RED, GL_FLOAT}, // R32F + {GL_RG8, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // RG8 + {GL_RG16F, GL_RG, GL_UNSIGNED_SHORT}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F + {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F + {GL_RGBA16, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA16 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F + {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE}, // BC1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE}, // BC2 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE}, // BC3 + {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7 }}; // GLES doesn't have the non-normalized 16-bit formats.. use float and hope for the best, lol. static constexpr std::array, static_cast(GPUTexture::Format::MaxCount)> mapping_gles = {{ - {}, // Unknown - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 - {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16 - {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U - {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F - {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U - {GL_R32F, GL_RED, GL_FLOAT}, // R32F - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F - {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16 - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F - {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F - {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + {}, // Unknown + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 + {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 + {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 + {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 + {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 + {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 + {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F + {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16 + {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I + {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U + {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F + {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I + {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U + {GL_R32F, GL_RED, GL_FLOAT}, // R32F + {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16 + {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F + {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16 + {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F + {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F + {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 + {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_UNSIGNED_BYTE}, // BC1 + {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_UNSIGNED_BYTE}, // BC2 + {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_UNSIGNED_BYTE}, // BC3 + {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_UNSIGNED_BYTE}, // BC7 }}; return gles ? mapping_gles[static_cast(format)] : mapping[static_cast(format)]; @@ -169,6 +177,7 @@ std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 else { const bool use_texture_storage = UseTextureStorage(false); + const bool is_compressed = IsCompressedFormat(format); if (use_texture_storage) { if (layers > 1) @@ -183,10 +192,10 @@ std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 const u32 alignment = GetUploadAlignment(data_pitch); if (data) { - GPUDevice::GetStatistics().buffer_streamed += data_pitch * height; + GPUDevice::GetStatistics().buffer_streamed += CalcUploadSize(format, height, data_pitch); GPUDevice::GetStatistics().num_uploads++; - glPixelStorei(GL_UNPACK_ROW_LENGTH, data_pitch / pixel_size); + glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(format, data_pitch)); if (alignment != DEFAULT_UPLOAD_ALIGNMENT) glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); } @@ -198,18 +207,55 @@ std::unique_ptr OpenGLTexture::Create(u32 width, u32 height, u32 { if (use_texture_storage) { - if (layers > 1) - glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr); + if (is_compressed) + { + const u32 size = CalcUploadSize(format, current_height, data_pitch); + if (layers > 1) + { + glCompressedTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, size, + data_ptr); + } + else + { + glCompressedTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, size, data_ptr); + } + } else - glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr); + { + if (layers > 1) + glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr); + else + glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr); + } } else { - if (layers > 1) - glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type, - data_ptr); + if (is_compressed) + { + const u32 size = CalcUploadSize(format, current_height, data_pitch); + if (layers > 1) + { + glCompressedTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, size, + data_ptr); + } + else + { + glCompressedTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, size, data_ptr); + } + } else - glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type, data_ptr); + { + if (layers > 1) + { + glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type, + data_ptr); + } + else + { + glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type, + data_ptr); + } + } } if (data_ptr) @@ -257,14 +303,11 @@ void OpenGLTexture::CommitClear() bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, u32 level /*= 0*/) { - // TODO: perf counters - // Worth using the PBO? Driver probably knows better... const GLenum target = GetGLTarget(); const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES()); - const u32 pixel_size = GetPixelSize(); - const u32 preferred_pitch = Common::AlignUpPow2(static_cast(width) * pixel_size, TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 map_size = preferred_pitch * static_cast(height); + const u32 preferred_pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 map_size = CalcUploadSize(height, pitch); OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); CommitClear(); @@ -283,8 +326,22 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data if (alignment != DEFAULT_UPLOAD_ALIGNMENT) glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / pixel_size); - glTexSubImage2D(target, layer, x, y, width, height, gl_format, gl_type, data); + glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch)); + if (IsCompressedFormat()) + { + const u32 size = CalcUploadSize(height, pitch); + if (IsTextureArray()) + glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size, data); + else + glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size, data); + } + else + { + if (IsTextureArray()) + glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type, data); + else + glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type, data); + } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); if (alignment != DEFAULT_UPLOAD_ALIGNMENT) @@ -293,13 +350,39 @@ bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data else { const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); - StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, width * pixel_size, height); + CopyTextureDataForUpload(width, height, m_format, map.pointer, preferred_pitch, data, pitch); sb->Unmap(map_size); sb->Bind(); - glPixelStorei(GL_UNPACK_ROW_LENGTH, preferred_pitch / pixel_size); - glTexSubImage2D(GL_TEXTURE_2D, layer, x, y, width, height, gl_format, gl_type, - reinterpret_cast(static_cast(map.buffer_offset))); + glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(preferred_pitch)); + if (IsCompressedFormat()) + { + const u32 size = CalcUploadSize(height, pitch); + if (IsTextureArray()) + { + glCompressedTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, size, + reinterpret_cast(static_cast(map.buffer_offset))); + } + else + { + glCompressedTexSubImage2D(target, level, x, y, width, height, gl_format, size, + reinterpret_cast(static_cast(map.buffer_offset))); + } + } + else + { + if (IsTextureArray()) + { + glTexSubImage3D(target, level, x, y, layer, width, height, 1, gl_format, gl_type, + reinterpret_cast(static_cast(map.buffer_offset))); + } + else + { + glTexSubImage2D(target, level, x, y, width, height, gl_format, gl_type, + reinterpret_cast(static_cast(map.buffer_offset))); + } + } + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); sb->Unbind(); @@ -315,8 +398,8 @@ bool OpenGLTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3 if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) return false; - const u32 pitch = Common::AlignUpPow2(static_cast(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 upload_size = pitch * static_cast(height); + const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 upload_size = CalcUploadSize(height, pitch); OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); if (!sb || upload_size > sb->GetSize()) return false; @@ -339,8 +422,8 @@ void OpenGLTexture::Unmap() { CommitClear(); - const u32 pitch = Common::AlignUpPow2(static_cast(m_map_width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); - const u32 upload_size = pitch * static_cast(m_map_height); + const u32 pitch = Common::AlignUpPow2(CalcUploadPitch(m_map_width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); + const u32 upload_size = CalcUploadSize(m_map_height, pitch); GPUDevice::GetStatistics().buffer_streamed += upload_size; GPUDevice::GetStatistics().num_uploads++; @@ -354,18 +437,35 @@ void OpenGLTexture::Unmap() const GLenum target = GetGLTarget(); glBindTexture(target, m_id); - glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / GetPixelSize()); + glPixelStorei(GL_UNPACK_ROW_LENGTH, CalcUploadRowLengthFromPitch(pitch)); const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES()); - if (IsTextureArray()) + if (IsCompressedFormat()) { - glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format, - gl_type, reinterpret_cast(static_cast(m_map_offset))); + const u32 size = CalcUploadSize(m_map_height, pitch); + if (IsTextureArray()) + { + glCompressedTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, + gl_format, size, reinterpret_cast(static_cast(m_map_offset))); + } + else + { + glCompressedTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, size, + reinterpret_cast(static_cast(m_map_offset))); + } } else { - glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type, - reinterpret_cast(static_cast(m_map_offset))); + if (IsTextureArray()) + { + glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format, + gl_type, reinterpret_cast(static_cast(m_map_offset))); + } + else + { + glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type, + reinterpret_cast(static_cast(m_map_offset))); + } } glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); diff --git a/src/util/vulkan_device.cpp b/src/util/vulkan_device.cpp index 11039075d..b6061b8a6 100644 --- a/src/util/vulkan_device.cpp +++ b/src/util/vulkan_device.cpp @@ -96,6 +96,10 @@ const std::array(GPUTexture::Format::MaxCount)> Vulka VK_FORMAT_R16G16B16A16_SFLOAT, // RGBA16F VK_FORMAT_R32G32B32A32_SFLOAT, // RGBA32F VK_FORMAT_A2R10G10B10_UNORM_PACK32, // RGB10A2 + VK_FORMAT_BC1_RGBA_UNORM_BLOCK, // BC1 + VK_FORMAT_BC2_UNORM_BLOCK, // BC2 + VK_FORMAT_BC3_UNORM_BLOCK, // BC3 + VK_FORMAT_BC7_UNORM_BLOCK, // BC7 }; // Handles are always 64-bit, even on 32-bit platforms. @@ -640,6 +644,7 @@ bool VulkanDevice::CreateDevice(VkSurfaceKHR surface, bool enable_validation_lay enabled_features.sampleRateShading = available_features.sampleRateShading; enabled_features.geometryShader = available_features.geometryShader; enabled_features.fragmentStoresAndAtomics = available_features.fragmentStoresAndAtomics; + enabled_features.textureCompressionBC = available_features.textureCompressionBC; device_info.pEnabledFeatures = &enabled_features; VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT rasterization_order_access_feature = { @@ -2456,6 +2461,10 @@ void VulkanDevice::SetFeatures(FeatureMask disabled_features, const VkPhysicalDe m_features.raster_order_views = (!(disabled_features & FEATURE_MASK_RASTER_ORDER_VIEWS) && vk_features.fragmentStoresAndAtomics && m_optional_extensions.vk_ext_fragment_shader_interlock); + + // Same feature bit for both. + m_features.dxt_textures = m_features.bptc_textures = + (!(disabled_features & FEATURE_MASK_COMPRESSED_TEXTURES) && vk_features.textureCompressionBC); } void VulkanDevice::CopyTextureRegion(GPUTexture* dst, u32 dst_x, u32 dst_y, u32 dst_layer, u32 dst_level, diff --git a/src/util/vulkan_texture.cpp b/src/util/vulkan_texture.cpp index 12c833635..8d7061806 100644 --- a/src/util/vulkan_texture.cpp +++ b/src/util/vulkan_texture.cpp @@ -230,20 +230,13 @@ VkCommandBuffer VulkanTexture::GetCommandBufferForUpdate() return dev.GetCurrentInitCommandBuffer(); } -void VulkanTexture::CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, - u32 upload_pitch) const -{ - StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, GetPixelSize() * width, height); -} - VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, - u32 height) const + u32 height, u32 buffer_size) const { - const u32 size = upload_pitch * height; const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, - static_cast(size), + static_cast(buffer_size), VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, @@ -270,8 +263,8 @@ VkBuffer VulkanTexture::AllocateUploadStagingBuffer(const void* data, u32 pitch, VulkanDevice::GetInstance().DeferBufferDestruction(buffer, allocation); // And write the data. - CopyTextureDataForUpload(ai.pMappedData, data, width, height, pitch, upload_pitch); - vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, size); + CopyTextureDataForUpload(width, height, m_format, ai.pMappedData, upload_pitch, data, pitch); + vmaFlushAllocation(VulkanDevice::GetInstance().GetAllocator(), allocation, 0, buffer_size); return buffer; } @@ -282,7 +275,7 @@ void VulkanTexture::UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 w if (old_layout != Layout::TransferDst) TransitionSubresourcesToLayout(cmdbuf, layer, 1, level, 1, old_layout, Layout::TransferDst); - const u32 row_length = pitch / GetPixelSize(); + const u32 row_length = CalcUploadRowLengthFromPitch(pitch); const VkBufferImageCopy bic = {static_cast(buffer_offset), row_length, @@ -302,8 +295,9 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data DebugAssert(layer < m_layers && level < m_levels); DebugAssert((x + width) <= GetMipWidth(level) && (y + height) <= GetMipHeight(level)); - const u32 upload_pitch = Common::AlignUpPow2(pitch, VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment()); - const u32 required_size = height * upload_pitch; + const u32 upload_pitch = + Common::AlignUpPow2(CalcUploadPitch(width), VulkanDevice::GetInstance().GetBufferCopyRowPitchAlignment()); + const u32 required_size = CalcUploadSize(height, upload_pitch); VulkanDevice& dev = VulkanDevice::GetInstance(); VulkanStreamBuffer& sbuffer = dev.GetTextureUploadBuffer(); @@ -314,7 +308,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data if (required_size > (sbuffer.GetCurrentSize() / 2)) { buffer_offset = 0; - buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height); + buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, width, height, required_size); if (buffer == VK_NULL_HANDLE) return false; } @@ -332,7 +326,7 @@ bool VulkanTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data buffer = sbuffer.GetBuffer(); buffer_offset = sbuffer.GetCurrentOffset(); - CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, width, height, pitch, upload_pitch); + CopyTextureDataForUpload(width, height, m_format, sbuffer.GetCurrentHostPointer(), upload_pitch, data, pitch); sbuffer.CommitMemory(required_size); } @@ -372,8 +366,8 @@ bool VulkanTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u3 CommitClear(GetCommandBufferForUpdate()); // see note in Update() for the reason why. - const u32 aligned_pitch = Common::AlignUpPow2(width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment()); - const u32 req_size = height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(width), dev.GetBufferCopyRowPitchAlignment()); + const u32 req_size = CalcUploadSize(height, aligned_pitch); VulkanStreamBuffer& buffer = dev.GetTextureUploadBuffer(); if (req_size >= (buffer.GetCurrentSize() / 2)) return false; @@ -402,8 +396,8 @@ void VulkanTexture::Unmap() { VulkanDevice& dev = VulkanDevice::GetInstance(); VulkanStreamBuffer& sb = dev.GetTextureUploadBuffer(); - const u32 aligned_pitch = Common::AlignUpPow2(m_map_width * GetPixelSize(), dev.GetBufferCopyRowPitchAlignment()); - const u32 req_size = m_map_height * aligned_pitch; + const u32 aligned_pitch = Common::AlignUpPow2(CalcUploadPitch(m_width), dev.GetBufferCopyRowPitchAlignment()); + const u32 req_size = CalcUploadSize(m_map_height, aligned_pitch); const u32 offset = sb.GetCurrentOffset(); sb.CommitMemory(req_size); diff --git a/src/util/vulkan_texture.h b/src/util/vulkan_texture.h index f7dd8b601..b30ba7dfb 100644 --- a/src/util/vulkan_texture.h +++ b/src/util/vulkan_texture.h @@ -85,8 +85,8 @@ private: VkImage image, VmaAllocation allocation, VkImageView view, VkFormat vk_format); VkCommandBuffer GetCommandBufferForUpdate(); - void CopyTextureDataForUpload(void* dst, const void* src, u32 width, u32 height, u32 pitch, u32 upload_pitch) const; - VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height) const; + VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 width, u32 height, + u32 buffer_size) const; void UpdateFromBuffer(VkCommandBuffer cmdbuf, u32 x, u32 y, u32 width, u32 height, u32 layer, u32 level, u32 pitch, VkBuffer buffer, u32 buffer_offset);