From 70c45f7cf89777be142e9e361f28830981fcb1e1 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Tue, 28 Jan 2025 17:34:04 +1000 Subject: [PATCH] GPU/TextureCache: Handle replacements larger than 256x256 I left this unfinished because it was a pain... --- src/core/gpu_hw_shadergen.cpp | 8 +++--- src/core/gpu_hw_texture_cache.cpp | 44 +++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index a010ed953..7cd35a21b 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1842,16 +1842,18 @@ std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool replac DefineMacro(ss, "REPLACEMENT", replacement); DefineMacro(ss, "SEMITRANSPARENT", semitransparent); DefineMacro(ss, "BILINEAR_FILTER", bilinear_filter); - DeclareUniformBuffer(ss, {"float4 u_texture_size"}, true); + DeclareUniformBuffer(ss, {"float4 u_src_rect", "float4 u_texture_size"}, true); DeclareTexture(ss, "samp0", 0); DeclareFragmentEntryPoint(ss, 0, 1); ss << R"( { + float2 start_coords = u_src_rect.xy + v_tex0 * u_src_rect.zw; + #if BILINEAR_FILTER // Compute the coordinates of the four texels we will be interpolating between. // Clamp this to the triangle texture coordinates. - float2 coords = v_tex0 * u_texture_size.xy; + float2 coords = start_coords * u_texture_size.xy; float2 texel_top_left = frac(coords) - float2(0.5, 0.5); float2 texel_offset = sign(texel_top_left); float4 fcoords = max(coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y), @@ -1883,7 +1885,7 @@ std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool replac color.a = (color.a >= 0.5) ? 1.0 : 0.0; #endif #else - float4 color = SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, 0.0); + float4 color = SAMPLE_TEXTURE_LEVEL(samp0, start_coords, 0.0); float orig_alpha = color.a; #endif o_col0.rgb = color.rgb; diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index 7d6339422..bf20ba1aa 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -2924,18 +2924,28 @@ void GPUTextureCache::GetVRAMWriteTextureReplacements(std::vector(TEXTURE_PAGE_WIDTH)); - DebugAssert(rect_in_page_space.height() <= static_cast(TEXTURE_PAGE_HEIGHT)); - GPUTexture* texture = GetTextureReplacementGPUImage(it->second.second); if (!texture) continue; + // Especially for C16 textures, the write may span multiple pages. In this case, we need to offset + // the start of the page into the replacement texture. + const GSVector2i rect_in_page_space_start = rect_in_page_space.xy(); + const GSVector2i src_offset = + GSVector2i::zero().sub32(rect_in_page_space_start) & rect_in_page_space_start.lt32(GSVector2i::zero()); + const GSVector4i clamped_rect_in_page_space = + rect_in_page_space.add32(GSVector4i::xyxy(src_offset, GSVector2i::zero())) + .rintersect(GSVector4i::cxpr(0, 0, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_HEIGHT)); + + // TODO: This fails in Wild Arms 2, writes that are wider than a page. + DebugAssert(rect_in_page_space.width() == name.width && rect_in_page_space.height() == name.height); + DebugAssert(clamped_rect_in_page_space.width() <= static_cast(TEXTURE_PAGE_WIDTH)); + DebugAssert(clamped_rect_in_page_space.height() <= static_cast(TEXTURE_PAGE_HEIGHT)); + const GSVector2 scale = GSVector2(texture->GetSizeVec()) / GSVector2(name.GetSizeVec()); - replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), texture, scale.x, scale.y, - name.IsSemitransparent()}); + replacements.push_back(TextureReplacementSubImage{ + clamped_rect_in_page_space, GSVector4i::xyxy(src_offset, src_offset.add32(clamped_rect_in_page_space.rsize())), + texture, scale.x, scale.y, name.IsSemitransparent()}); } } @@ -2994,8 +3004,8 @@ void GPUTextureCache::GetTexturePageTextureReplacements(std::vectorGetSizeVec()) / GSVector2(name.GetSizeVec()); - replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::zero(), texture, scale.x, scale.y, - name.IsSemitransparent()}); + replacements.push_back(TextureReplacementSubImage{rect_in_page_space, GSVector4i::loadh(texture->GetSizeVec()), + texture, scale.x, scale.y, name.IsSemitransparent()}); } } @@ -3662,10 +3672,11 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, g_gpu_device->SetRenderTarget(s_state.replacement_texture_render_target.get()); GL_INS("Upscale Texture Page"); - alignas(VECTOR_ALIGNMENT) float uniforms[4]; + alignas(VECTOR_ALIGNMENT) float uniforms[8]; GSVector2 texture_size = GSVector2(GSVector2i(entry->texture->GetWidth(), entry->texture->GetHeight())); - GSVector2::store(&uniforms[0], texture_size); - GSVector2::store(&uniforms[2], GSVector2::cxpr(1.0f) / texture_size); + GSVector4::store(&uniforms[0], GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f)); + GSVector2::store(&uniforms[4], texture_size); + GSVector2::store(&uniforms[6], GSVector2::cxpr(1.0f) / texture_size); g_gpu_device->SetViewportAndScissor(0, 0, new_width, new_height); g_gpu_device->SetPipeline(s_state.replacement_upscale_pipeline.get()); g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); @@ -3677,16 +3688,21 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, GL_INS_FMT("Blit {}x{} replacement from {} to {}", si.texture->GetWidth(), si.texture->GetHeight(), si.src_rect, si.dst_rect); + const GSVector4 src_rect = (GSVector4(GSVector4i::xyxy(si.src_rect.xy(), si.src_rect.rsize())) * + GSVector4::xyxy(GSVector2(si.scale_x, si.scale_y))) / + GSVector4(GSVector4i::xyxy(si.texture->GetSizeVec())); const GSVector4i dst_rect = GSVector4i(GSVector4(si.dst_rect) * max_scale_v); texture_size = GSVector2(si.texture->GetSizeVec()); - GSVector2::store(&uniforms[0], texture_size); - GSVector2::store(&uniforms[2], GSVector2::cxpr(1.0f) / texture_size); + GSVector4::store(&uniforms[0], src_rect); + GSVector2::store(&uniforms[4], texture_size); + GSVector2::store(&uniforms[6], GSVector2::cxpr(1.0f) / texture_size); g_gpu_device->SetViewportAndScissor(dst_rect); g_gpu_device->SetTextureSampler(0, si.texture, s_state.config.replacement_scale_linear_filter ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler()); g_gpu_device->SetPipeline(si.invert_alpha ? s_state.replacement_semitransparent_draw_pipeline.get() : s_state.replacement_draw_pipeline.get()); + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); g_gpu_device->Draw(3, 0); }