Skip to content

Commit

Permalink
rsx/vk/gl: Enforce format matching for render target resources. Fall …
Browse files Browse the repository at this point in the history
…back to raw data copy if match fails

- Forces Bitcast of texture data if input format cannot possibly be the
  same as the existing texture format

- rsx: Other minor improvements to texture cache :-
  - remove obsolete blit engine incompatibility warning. The texture will be re-uploaded if it is indeed incompatible
  - Implement warn_once and err_once to avoid spamming the log with systemic errors
  - Track mispredicted flushes
  - Reswizzle bitcasted texture data to native layout
    TODO: Also needs reshuffle according to input remap vector
  • Loading branch information
kd-11 committed Mar 13, 2018
1 parent 68b3229 commit 20d4c09
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 31 deletions.
60 changes: 49 additions & 11 deletions rpcs3/Emu/RSX/Common/texture_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,8 @@ namespace rsx

std::unordered_map<u32, framebuffer_memory_characteristics> m_cache_miss_statistics_table;

//Set when a hw blit engine incompatibility is detected
bool blit_engine_incompatibility_warning_raised = false;
//Map of messages to only emit once
std::unordered_map<std::string, bool> m_once_only_messages_map;

//Set when a shader read-only texture data suddenly becomes contested, usually by fbo memory
bool read_only_tex_invalidate = false;
Expand All @@ -371,6 +371,7 @@ namespace rsx
//Other statistics
std::atomic<u32> m_num_flush_requests = { 0 };
std::atomic<u32> m_num_cache_misses = { 0 };
std::atomic<u32> m_num_cache_mispredictions = { 0 };

/* Helpers */
virtual void free_texture_section(section_storage_type&) = 0;
Expand All @@ -386,6 +387,7 @@ namespace rsx
virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::array<image_resource_type, 6>& sources) = 0;
virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy) = 0;
virtual void update_image_contents(commandbuffer_type&, image_view_type dst, image_resource_type src, u16 width, u16 height) = 0;
virtual bool render_target_format_is_compatible(image_storage_type* tex, u32 gcm_format) = 0;

constexpr u32 get_block_size() const { return 0x1000000; }
inline u32 get_block_address(u32 address) const { return (address & ~0xFFFFFF); }
Expand All @@ -395,6 +397,33 @@ namespace rsx
m_cache_update_tag++;
}

template <typename ...Args>
void emit_once(bool error, const char* fmt, Args&&... params)
{
const std::string message = fmt::format(fmt, std::forward<Args>(params)...);
if (m_once_only_messages_map.find(message) != m_once_only_messages_map.end())
return;

if (error)
logs::RSX.error(message.c_str());
else
logs::RSX.warning(message.c_str());

m_once_only_messages_map[message] = true;
}

template <typename ...Args>
void err_once(const char* fmt, Args&&... params)
{
emit_once(true, fmt, std::forward<Args>(params)...);
}

template <typename ...Args>
void warn_once(const char* fmt, Args&&... params)
{
emit_once(false, fmt, std::forward<Args>(params)...);
}

private:
//Internal implementation methods and helpers

Expand Down Expand Up @@ -1431,6 +1460,12 @@ namespace rsx
}
}

if (!requires_processing)
{
//Check if we need to do anything about the formats
requires_processing = !render_target_format_is_compatible(texptr, format);
}

if (requires_processing)
{
const auto w = rsx::apply_resolution_scale(internal_width, true);
Expand Down Expand Up @@ -1610,7 +1645,7 @@ namespace rsx
}
}

if ((!blit_engine_incompatibility_warning_raised && g_cfg.video.use_gpu_texture_scaling) || is_hw_blit_engine_compatible(format))
if (is_hw_blit_engine_compatible(format))
{
//Find based on range instead
auto overlapping_surfaces = find_texture_from_range(texaddr, tex_size);
Expand Down Expand Up @@ -1641,14 +1676,6 @@ namespace rsx
break;
}

if (!blit_engine_incompatibility_warning_raised && !is_hw_blit_engine_compatible(format))
{
LOG_ERROR(RSX, "Format 0x%X is not compatible with the hardware blit acceleration."
" Consider turning off GPU texture scaling in the options to partially handle textures on your CPU.", format);
blit_engine_incompatibility_warning_raised = true;
break;
}

if (surface->get_sampler_status() != rsx::texture_sampler_status::status_ready)
set_up_remap_vector(*surface, tex.decoded_remap());

Expand Down Expand Up @@ -2041,6 +2068,11 @@ namespace rsx
cached_dest->reprotect(utils::protection::no);
m_cache[get_block_address(cached_dest->get_section_base())].notify();
}
else if (cached_dest->is_synchronized())
{
//Prematurely read back
m_num_cache_mispredictions++;
}

cached_dest->touch();
}
Expand Down Expand Up @@ -2100,6 +2132,7 @@ namespace rsx
{
m_num_flush_requests.store(0u);
m_num_cache_misses.store(0u);
m_num_cache_mispredictions.store(0u);
}

virtual const u32 get_unreleased_textures_count() const
Expand All @@ -2117,6 +2150,11 @@ namespace rsx
return m_num_flush_requests;
}

virtual u32 get_num_cache_mispredictions() const
{
return m_num_cache_mispredictions;
}

virtual f32 get_cache_miss_ratio() const
{
const auto num_flushes = m_num_flush_requests.load();
Expand Down
11 changes: 6 additions & 5 deletions rpcs3/Emu/RSX/GL/GLGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1376,13 +1376,14 @@ void GLGSRender::flip(int buffer)
m_text_printer.print_text(0, 54, m_frame->client_width(), m_frame->client_height(), "textures upload time: " + std::to_string(m_textures_upload_time) + "us");
m_text_printer.print_text(0, 72, m_frame->client_width(), m_frame->client_height(), "draw call execution: " + std::to_string(m_draw_time) + "us");

auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
const auto num_dirty_textures = m_gl_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_gl_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_gl_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_gl_texture_cache.get_num_cache_mispredictions();
const auto cache_miss_ratio = (u32)ceil(m_gl_texture_cache.get_cache_miss_ratio() * 100);
m_text_printer.print_text(0, 108, m_frame->client_width(), m_frame->client_height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
m_text_printer.print_text(0, 126, m_frame->client_width(), m_frame->client_height(), "Texture memory: " + std::to_string(texture_memory_size) + "M");
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), "Flush requests: " + std::to_string(num_flushes) + " (" + std::to_string(cache_miss_ratio) + "% hard faults)");
m_text_printer.print_text(0, 144, m_frame->client_width(), m_frame->client_height(), fmt::format("Flush requests: %d (%d%% hard faults, %d mispedictions)", num_flushes, cache_miss_ratio, num_mispredict));
}

m_frame->flip(m_context);
Expand Down
52 changes: 44 additions & 8 deletions rpcs3/Emu/RSX/GL/GLTextureCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -654,10 +654,13 @@ namespace gl
m_temporary_surfaces.resize(0);
}

u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u16 x, u16 y, u16 width, u16 height, bool copy = true)
u32 create_temporary_subresource_impl(u32 src_id, GLenum sized_internal_fmt, GLenum dst_type, u32 gcm_format, u16 x, u16 y, u16 width, u16 height, bool copy = true)
{
u32 dst_id = 0;

if (sized_internal_fmt == GL_NONE)
sized_internal_fmt = gl::get_sized_internal_format(gcm_format);

GLenum ifmt;
glBindTexture(GL_TEXTURE_2D, src_id);
glGetTexLevelParameteriv(GL_TEXTURE_2D, 0, GL_TEXTURE_INTERNAL_FORMAT, (GLint*)&ifmt);
Expand Down Expand Up @@ -702,6 +705,13 @@ namespace gl
}
}

if (ifmt != sized_internal_fmt)
{
err_once("GL format mismatch (data cast?). Sized ifmt=0x%X vs Src ifmt=0x%X", sized_internal_fmt, ifmt);
//Apply base component map onto the new texture if a data cast has been done
apply_component_mapping_flags(dst_type, gcm_format, rsx::texture_create_flags::default_component_order);
}

return dst_id;
}

Expand Down Expand Up @@ -764,20 +774,18 @@ namespace gl

u32 create_temporary_subresource_view(void*&, u32* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override
{
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
return create_temporary_subresource_impl(*src, ifmt, GL_TEXTURE_2D, x, y, w, h);
return create_temporary_subresource_impl(*src, GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h);
}

u32 create_temporary_subresource_view(void*&, gl::texture* src, u32 gcm_format, u16 x, u16 y, u16 w, u16 h) override
{
if (auto as_rtt = dynamic_cast<gl::render_target*>(src))
{
return create_temporary_subresource_impl(src->id(), (GLenum)as_rtt->get_compatible_internal_format(), GL_TEXTURE_2D, x, y, w, h);
return create_temporary_subresource_impl(src->id(), (GLenum)as_rtt->get_compatible_internal_format(), GL_TEXTURE_2D, gcm_format, x, y, w, h);
}
else
{
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
return create_temporary_subresource_impl(src->id(), ifmt, GL_TEXTURE_2D, x, y, w, h);
return create_temporary_subresource_impl(src->id(), GL_NONE, GL_TEXTURE_2D, gcm_format, x, y, w, h);
}
}

Expand Down Expand Up @@ -820,8 +828,7 @@ namespace gl

u32 generate_atlas_from_images(void*&, u32 gcm_format, u16 width, u16 height, const std::vector<copy_region_descriptor>& sections_to_copy) override
{
const GLenum ifmt = gl::get_sized_internal_format(gcm_format);
auto result = create_temporary_subresource_impl(sections_to_copy.front().src, ifmt, GL_TEXTURE_2D, 0, 0, width, height, false);
auto result = create_temporary_subresource_impl(sections_to_copy.front().src, GL_NONE, GL_TEXTURE_2D, gcm_format, 0, 0, width, height, false);

for (const auto &region : sections_to_copy)
{
Expand Down Expand Up @@ -970,6 +977,35 @@ namespace gl
glTextureBarrierNV();
}

bool render_target_format_is_compatible(gl::texture* tex, u32 gcm_format) override
{
if (auto as_rtt = dynamic_cast<gl::render_target*>(tex))
{
auto ifmt = as_rtt->get_compatible_internal_format();
switch (gcm_format)
{
default:
//TODO
err_once("Format incompatibility detected, reporting failure to force data copy (GL_INTERNAL_FORMAT=0x%X, GCM_FORMAT=0x%X)", (u32)ifmt, gcm_format);
return false;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return (ifmt == gl::texture::internal_format::rgba16f);
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return (ifmt == gl::texture::internal_format::rgba32f);
case CELL_GCM_TEXTURE_X32_FLOAT:
return (ifmt == gl::texture::internal_format::r32f);
case CELL_GCM_TEXTURE_R5G6B5:
return (ifmt == gl::texture::internal_format::r5g6b5);
case CELL_GCM_TEXTURE_DEPTH24_D8:
return (ifmt == gl::texture::internal_format::depth24_stencil8 || ifmt == gl::texture::internal_format::depth32f_stencil8);
case CELL_GCM_TEXTURE_A8R8G8B8:
return (ifmt == gl::texture::internal_format::rgba8 || ifmt == gl::texture::internal_format::depth24_stencil8 || ifmt == gl::texture::internal_format::depth32f_stencil8);
}
}

fmt::throw_exception("Format comparison for non-rendertargets is not implemented" HERE);
}

public:

texture_cache() {}
Expand Down
13 changes: 7 additions & 6 deletions rpcs3/Emu/RSX/VK/VKGSRender.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3113,15 +3113,16 @@ void VKGSRender::flip(int buffer)
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 72, direct_fbo->width(), direct_fbo->height(), "draw call execution: " + std::to_string(m_draw_time) + "us");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 90, direct_fbo->width(), direct_fbo->height(), "submit and flip: " + std::to_string(m_flip_time) + "us");

auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
auto num_flushes = m_texture_cache.get_num_flush_requests();
auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
const auto num_dirty_textures = m_texture_cache.get_unreleased_textures_count();
const auto texture_memory_size = m_texture_cache.get_texture_memory_in_use() / (1024 * 1024);
const auto tmp_texture_memory_size = m_texture_cache.get_temporary_memory_in_use() / (1024 * 1024);
const auto num_flushes = m_texture_cache.get_num_flush_requests();
const auto num_mispredict = m_texture_cache.get_num_cache_mispredictions();
const auto cache_miss_ratio = (u32)ceil(m_texture_cache.get_cache_miss_ratio() * 100);
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 126, direct_fbo->width(), direct_fbo->height(), "Unreleased textures: " + std::to_string(num_dirty_textures));
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 144, direct_fbo->width(), direct_fbo->height(), "Texture cache memory: " + std::to_string(texture_memory_size) + "M");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 162, direct_fbo->width(), direct_fbo->height(), "Temporary texture memory: " + std::to_string(tmp_texture_memory_size) + "M");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), "Flush requests: " + std::to_string(num_flushes) + " (" + std::to_string(cache_miss_ratio) + "% hard faults)");
m_text_writer->print_text(*m_current_command_buffer, *direct_fbo, 0, 180, direct_fbo->width(), direct_fbo->height(), fmt::format("Flush requests: %d (%d%% hard faults, %d mispedictions)", num_flushes, cache_miss_ratio, num_mispredict));
}

vk::change_image_layout(*m_current_command_buffer, target_image, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, present_layout, subres);
Expand Down
36 changes: 35 additions & 1 deletion rpcs3/Emu/RSX/VK/VKTextureCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,18 @@ namespace vk
w, h, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, source->info.flags));

VkComponentMapping view_swizzle = source->native_component_map;
if (dst_format != source->info.format)
{
//This is a data cast operation
//Use native mapping for the new type
//TODO: Also reapply the view swizzle
const auto remap = get_component_mapping(gcm_format);
view_swizzle = { remap[1], remap[2], remap[3], remap[0] };
}

VkImageSubresourceRange view_range = { aspect & ~(VK_IMAGE_ASPECT_STENCIL_BIT), 0, 1, 0, 1 };
view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, view_type, dst_format, source->native_component_map, view_range));
view.reset(new vk::image_view(*vk::get_current_renderer(), image->value, view_type, dst_format, view_swizzle, view_range));

if (copy)
{
Expand Down Expand Up @@ -983,6 +993,30 @@ namespace vk
vk::insert_texture_barrier(cmd, tex);
}

bool render_target_format_is_compatible(vk::image* tex, u32 gcm_format) override
{
auto vk_format = tex->info.format;
switch (gcm_format)
{
default:
//TODO
err_once("Format incompatibility detected, reporting failure to force data copy (VK_FORMAT=0x%X, GCM_FORMAT=0x%X)", (u32)vk_format, gcm_format);
return false;
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
return (vk_format == VK_FORMAT_R16G16B16A16_SFLOAT);
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
return (vk_format == VK_FORMAT_R32G32B32A32_SFLOAT);
case CELL_GCM_TEXTURE_X32_FLOAT:
return (vk_format == VK_FORMAT_R32_SFLOAT);
case CELL_GCM_TEXTURE_R5G6B5:
return (vk_format == VK_FORMAT_R5G6B5_UNORM_PACK16);
case CELL_GCM_TEXTURE_DEPTH24_D8:
return (vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT);
case CELL_GCM_TEXTURE_A8R8G8B8:
return (vk_format == VK_FORMAT_B8G8R8A8_UNORM || vk_format == VK_FORMAT_D24_UNORM_S8_UINT || vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT);
}
}

public:

struct vk_blit_op_result : public blit_op_result
Expand Down

0 comments on commit 20d4c09

Please sign in to comment.