diff options
Diffstat (limited to '')
21 files changed, 668 insertions, 660 deletions
diff --git a/src/core/hle/service/time/time_zone_content_manager.cpp b/src/core/hle/service/time/time_zone_content_manager.cpp index 57b1a2bca..78d4acd95 100644 --- a/src/core/hle/service/time/time_zone_content_manager.cpp +++ b/src/core/hle/service/time/time_zone_content_manager.cpp @@ -53,7 +53,7 @@ static std::vector<std::string> BuildLocationNameCache(Core::System& system) { return {}; } - std::vector<char> raw_data(binary_list->GetSize()); + std::vector<char> raw_data(binary_list->GetSize() + 1); binary_list->ReadBytes<char>(raw_data.data(), binary_list->GetSize()); std::stringstream data_stream{raw_data.data()}; diff --git a/src/core/settings.h b/src/core/settings.h index cb5979e6f..12e2cc9e7 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -387,6 +387,7 @@ struct Values { s32 current_user; s32 language_index; + s32 sound_index; // Controls std::array<PlayerInput, 10> players; diff --git a/src/input_common/udp/udp.cpp b/src/input_common/udp/udp.cpp index ca99cc22f..8c6ef1394 100644 --- a/src/input_common/udp/udp.cpp +++ b/src/input_common/udp/udp.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <mutex> +#include <optional> #include <tuple> #include "common/param_package.h" @@ -44,7 +45,7 @@ public: std::unique_ptr<Input::TouchDevice> Create(const Common::ParamPackage& params) override { { std::lock_guard guard(status->update_mutex); - status->touch_calibration.emplace(); + status->touch_calibration = DeviceStatus::CalibrationData{}; // These default values work well for DS4 but probably not other touch inputs status->touch_calibration->min_x = params.Get("min_x", 100); status->touch_calibration->min_y = params.Get("min_y", 50); diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index 4429f3405..e16075993 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp @@ -15,14 +15,6 @@ namespace VideoCommon::Dirty { using Tegra::Engines::Maxwell3D; -void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store) { - store[RenderTargets] = true; - store[ZetaBuffer] = true; - for (std::size_t i = 0; i < Maxwell3D::Regs::NumRenderTargets; ++i) { - store[ColorBuffer0 + i] = true; - } -} - void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { static constexpr std::size_t num_per_rt = NUM(rt[0]); static constexpr std::size_t begin = OFF(rt); diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 0dbafd3ef..3f6c1d83a 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h @@ -44,8 +44,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_ FillBlock(tables[1], begin, num, index_b); } -void SetupCommonOnWriteStores(Tegra::Engines::Maxwell3D::DirtyState::Flags& store); - void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); } // namespace VideoCommon::Dirty diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 1ecd65925..368c75a66 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -119,14 +119,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); - const auto r_type{tic_entry.r_type.Value()}; - const auto g_type{tic_entry.g_type.Value()}; - const auto b_type{tic_entry.b_type.Value()}; - const auto a_type{tic_entry.a_type.Value()}; - - // TODO(Subv): Different data types for separate components are not supported - DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type); - return tic_entry; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 1af4268a4..063f41327 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -93,10 +93,6 @@ void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } -void oglEnablei(GLenum cap, bool state, GLuint index) { - (state ? glEnablei : glDisablei)(cap, index); -} - } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, @@ -478,7 +474,6 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; query_cache.UpdateCounters(); @@ -529,7 +524,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Upload vertex and index data. SetupVertexBuffer(); SetupVertexInstances(); - GLintptr index_buffer_offset; + GLintptr index_buffer_offset = 0; if (is_indexed) { index_buffer_offset = SetupIndexBuffer(); } @@ -555,7 +550,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { ConfigureFramebuffers(); // Signal the buffer cache that we are not going to upload more things. - const bool invalidate = buffer_cache.Unmap(); + buffer_cache.Unmap(); // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. vertex_array_pushbuffer.Bind(); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3adf7f0cb..2c38f57fd 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -393,10 +393,6 @@ std::string FlowStackTopName(MetaStackClass stack) { return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); } -[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { - return stage == ShaderType::Vertex; -} - struct GenericVaryingDescription { std::string name; u8 first_element = 0; @@ -529,8 +525,9 @@ private: } void DeclareVertex() { - if (!IsVertexShader(stage)) + if (stage != ShaderType::Vertex) { return; + } DeclareVertexRedeclarations(); } @@ -602,14 +599,14 @@ private: break; } } - if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { + if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) { if (ir.UsesLayer()) { code.AddLine("int gl_Layer;"); } if (ir.UsesViewportIndex()) { code.AddLine("int gl_ViewportIndex;"); } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && + } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { LOG_ERROR( Render_OpenGL, @@ -1147,7 +1144,7 @@ private: // TODO(Subv): Find out what the values are for the first two elements when inside a // vertex shader, and what's the value of the fourth element when inside a Tess Eval // shader. - ASSERT(IsVertexShader(stage)); + ASSERT(stage == ShaderType::Vertex); switch (element) { case 2: // Config pack's first value is instance_id. @@ -1218,12 +1215,12 @@ private: UNIMPLEMENTED(); return {}; case 1: - if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { + if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { return {}; } return {{"gl_Layer", Type::Int}}; case 2: - if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { + if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { return {}; } return {{"gl_ViewportIndex", Type::Int}}; @@ -2009,16 +2006,19 @@ private: expr += GetSampler(meta->sampler); expr += ", "; - expr += constructors.at(operation.GetOperandsCount() - 1); + expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - const std::size_t next = i + 1; - if (next == count) - expr += ')'; - else if (next < count) + if (i > 0) { expr += ", "; + } + expr += VisitOperand(operation, i).AsInt(); } + if (meta->array) { + expr += ", "; + expr += Visit(meta->array).AsInt(); + } + expr += ')'; if (meta->lod && !meta->sampler.IsBuffer()) { expr += ", "; @@ -2529,7 +2529,7 @@ private: } u32 GetNumPhysicalInputAttributes() const { - return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); + return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); } u32 GetNumPhysicalAttributes() const { diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 3f3bdf812..255ac3147 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -238,7 +238,6 @@ void StateTracker::Initialize() { SetupDirtyMisc(tables); auto& store = dirty.on_write_stores; - SetupCommonOnWriteStores(store); store[VertexBuffers] = true; for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { store[VertexBuffer0 + i] = true; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 59e963263..f93447610 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -257,6 +257,8 @@ vk::ShaderStageFlagBits ShaderStage(Tegra::Engines::ShaderType stage) { return vk::ShaderStageFlagBits::eGeometry; case Tegra::Engines::ShaderType::Fragment: return vk::ShaderStageFlagBits::eFragment; + case Tegra::Engines::ShaderType::Compute: + return vk::ShaderStageFlagBits::eCompute; } UNIMPLEMENTED_MSG("Unimplemented shader stage={}", static_cast<u32>(stage)); return {}; @@ -367,6 +369,10 @@ vk::Format VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttr return vk::Format::eR8G8B8A8Uint; case Maxwell::VertexAttribute::Size::Size_32: return vk::Format::eR32Uint; + case Maxwell::VertexAttribute::Size::Size_32_32: + return vk::Format::eR32G32Uint; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return vk::Format::eR32G32B32Uint; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return vk::Format::eR32G32B32A32Uint; default: diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 056ef495c..557b9d662 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -179,10 +179,11 @@ Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue) + VKUpdateDescriptorQueue& update_descriptor_queue, + VKRenderPassCache& renderpass_cache) : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, - renderpass_cache(device) {} + renderpass_cache{renderpass_cache} {} VKPipelineCache::~VKPipelineCache() = default; @@ -191,7 +192,6 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { std::array<Shader, Maxwell::MaxShaderProgram> shaders; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = gpu.regs.shader_config[index]; const auto program{static_cast<Maxwell::ShaderProgram>(index)}; // Skip stages that are not enabled diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 21340c9a4..c4c112290 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -161,7 +161,8 @@ public: explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue); + VKUpdateDescriptorQueue& update_descriptor_queue, + VKRenderPassCache& renderpass_cache); ~VKPipelineCache(); std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); @@ -184,8 +185,7 @@ private: VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; VKUpdateDescriptorQueue& update_descriptor_queue; - - VKRenderPassCache renderpass_cache; + VKRenderPassCache& renderpass_cache; std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c9886cc16..58c69b786 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -287,12 +287,13 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind screen_info{screen_info}, device{device}, resource_manager{resource_manager}, memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, staging_pool(device, memory_manager, scheduler), descriptor_pool(device), - update_descriptor_queue(device, scheduler), + update_descriptor_queue(device, scheduler), renderpass_cache(device), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, staging_pool), - pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), + pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, + renderpass_cache), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), sampler_cache(device), query_cache(system, *this, device, scheduler) { scheduler.SetQueryCache(query_cache); @@ -365,13 +366,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); - query_cache.UpdateCounters(); - const auto& gpu = system.GPU().Maxwell3D(); if (!system.GPU().Maxwell3D().ShouldExecute()) { return; } + sampled_views.clear(); + image_views.clear(); + + query_cache.UpdateCounters(); + const auto& regs = gpu.regs; const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A; @@ -380,52 +384,54 @@ void RasterizerVulkan::Clear() { if (!use_color && !use_depth && !use_stencil) { return; } - // Clearing images requires to be out of a renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. + [[maybe_unused]] const auto texceptions = UpdateAttachments(); + DEBUG_ASSERT(texceptions.none()); + SetupImageTransitions(0, color_attachments, zeta_attachment); - if (use_color) { - View color_view; - { - MICROPROFILE_SCOPE(Vulkan_RenderTargets); - color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); - } + const vk::RenderPass renderpass = renderpass_cache.GetRenderPass(GetRenderPassParams(0)); + const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); + scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); + + const auto& scissor = regs.scissor_test[0]; + const vk::Offset2D scissor_offset(scissor.min_x, scissor.min_y); + vk::Extent2D scissor_extent{scissor.max_x - scissor.min_x, scissor.max_y - scissor.min_y}; + scissor_extent.width = std::min(scissor_extent.width, render_area.width); + scissor_extent.height = std::min(scissor_extent.height, render_area.height); - color_view->Transition(vk::ImageLayout::eTransferDstOptimal, - vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite); + const u32 layer = regs.clear_buffers.layer; + const vk::ClearRect clear_rect({scissor_offset, scissor_extent}, layer, 1); + if (use_color) { const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], regs.clear_color[2], regs.clear_color[3]}; - const vk::ClearColorValue clear(clear_color); - scheduler.Record([image = color_view->GetImage(), - subresource = color_view->GetImageSubresourceRange(), - clear](auto cmdbuf, auto& dld) { - cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, - dld); + const vk::ClearValue clear_value{clear_color}; + const u32 color_attachment = regs.clear_buffers.RT; + scheduler.Record([color_attachment, clear_value, clear_rect](auto cmdbuf, auto& dld) { + const vk::ClearAttachment attachment(vk::ImageAspectFlagBits::eColor, color_attachment, + clear_value); + cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); }); } - if (use_depth || use_stencil) { - View zeta_surface; - { - MICROPROFILE_SCOPE(Vulkan_RenderTargets); - zeta_surface = texture_cache.GetDepthBufferSurface(false); - } - zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, - vk::PipelineStageFlagBits::eTransfer, - vk::AccessFlagBits::eTransferWrite); - - const vk::ClearDepthStencilValue clear(regs.clear_depth, - static_cast<u32>(regs.clear_stencil)); - scheduler.Record([image = zeta_surface->GetImage(), - subresource = zeta_surface->GetImageSubresourceRange(), - clear](auto cmdbuf, auto& dld) { - cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, - subresource, dld); - }); + if (!use_depth && !use_stencil) { + return; + } + vk::ImageAspectFlags aspect_flags; + if (use_depth) { + aspect_flags |= vk::ImageAspectFlagBits::eDepth; + } + if (use_stencil) { + aspect_flags |= vk::ImageAspectFlagBits::eStencil; } + + scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, + clear_rect, aspect_flags](auto cmdbuf, auto& dld) { + const vk::ClearDepthStencilValue clear_zeta(clear_depth, clear_stencil); + const vk::ClearValue clear_value{clear_zeta}; + const vk::ClearAttachment attachment(aspect_flags, 0, clear_value); + cmdbuf.clearAttachments(1, &attachment, 1, &clear_rect, dld); + }); } void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { @@ -542,8 +548,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, // Verify that the cached surface is the same size and format as the requested framebuffer const auto& params{surface->GetSurfaceParams()}; - const auto& pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index b2e73d98d..3185868e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -253,6 +253,7 @@ private: VKStagingBufferPool staging_pool; VKDescriptorPool descriptor_pool; VKUpdateDescriptorQueue update_descriptor_queue; + VKRenderPassCache renderpass_cache; QuadArrayPass quad_array_pass; Uint8Pass uint8_pass; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index d9ea3cc21..374959f82 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -100,7 +100,6 @@ void VKStagingBufferPool::ReleaseCache(bool host_visible) { } u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { - static constexpr u64 epochs_to_destroy = 180; static constexpr std::size_t deletions_per_tick = 16; auto& staging = cache[log2]; @@ -108,6 +107,7 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo const std::size_t old_size = entries.size(); const auto is_deleteable = [this](const auto& entry) { + static constexpr u64 epochs_to_destroy = 180; return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); }; const std::size_t begin_offset = staging.delete_index; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index d74e68b63..94a89e388 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -90,8 +90,6 @@ void StateTracker::Initialize() { SetupDirtyBlendConstants(tables); SetupDirtyDepthBounds(tables); SetupDirtyStencilProperties(tables); - - SetupCommonOnWriteStores(dirty.on_write_stores); } void StateTracker::InvalidateCommandBufferState() { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 73d92a5ae..26175921b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -52,6 +52,9 @@ vk::ImageType SurfaceTargetToImage(SurfaceTarget target) { return vk::ImageType::e2D; case SurfaceTarget::Texture3D: return vk::ImageType::e3D; + case SurfaceTarget::TextureBuffer: + UNREACHABLE(); + return {}; } UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target)); return {}; @@ -273,7 +276,6 @@ void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) { for (u32 level = 0; level < params.num_levels; ++level) { vk::BufferImageCopy copy = GetBufferImageCopy(level); - const auto& dld = device.GetDispatchLoader(); if (image->GetAspectMask() == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) { vk::BufferImageCopy depth = copy; @@ -422,7 +424,6 @@ void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal); - const auto& dld{device.GetDispatchLoader()}; const vk::ImageSubresourceLayers src_subresource( src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers); const vk::ImageSubresourceLayers dst_subresource( @@ -458,7 +459,6 @@ void VKTextureCache::ImageBlit(View& src_view, View& dst_view, dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right}); const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; - const auto& dld{device.GetDispatchLoader()}; scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit, is_linear](auto cmdbuf, auto& dld) { cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image, diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp index 206961909..fbd7e9a17 100644 --- a/src/video_core/shader/decode/xmad.cpp +++ b/src/video_core/shader/decode/xmad.cpp @@ -12,6 +12,7 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; @@ -63,15 +64,18 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { } }(); - op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); + op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), + instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); const Node original_b = op_b; - op_b = BitfieldExtract(op_b, is_high_b ? 16 : 0, 16); + op_b = SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b), + is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); - // TODO(Rodrigo): Use an appropiate sign for this operation - Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); + // we already check sign_a and sign_b is difference or not before so just use one in here. + Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); if (is_psl) { - product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); + product = + SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); } SetTemporary(bb, 0, product); product = GetTemporary(0); @@ -88,12 +92,40 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { return BitfieldExtract(original_c, 16, 16); case Tegra::Shader::XmadMode::CBcc: { const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, - NO_PRECISE, original_b, Immediate(16)); - return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, - shifted_b); + original_b, Immediate(16)); + return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b); + } + case Tegra::Shader::XmadMode::CSfu: { + const Node comp_a = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_a, + op_a, Immediate(0)); + const Node comp_b = GetPredicateComparisonInteger(PredCondition::Equal, is_signed_b, + op_b, Immediate(0)); + const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); + + const Node comp_minus_a = GetPredicateComparisonInteger( + PredCondition::NotEqual, is_signed_a, + SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, + Immediate(0x80000000)), + Immediate(0)); + const Node comp_minus_b = GetPredicateComparisonInteger( + PredCondition::NotEqual, is_signed_b, + SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, + Immediate(0x80000000)), + Immediate(0)); + + Node new_c = Operation( + OperationCode::Select, comp_minus_a, + SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), + original_c); + new_c = Operation( + OperationCode::Select, comp_minus_b, + SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), + std::move(new_c)); + + return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); } default: - UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); + UNREACHABLE(); return Immediate(0); } }(); @@ -102,18 +134,19 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { op_c = GetTemporary(1); // TODO(Rodrigo): Use an appropiate sign for this operation - Node sum = Operation(OperationCode::IAdd, product, op_c); + Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); SetTemporary(bb, 2, sum); sum = GetTemporary(2); if (is_merge) { - const Node a = BitfieldExtract(sum, 0, 16); - const Node b = - Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); - sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); + const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), + Immediate(0), Immediate(16)); + const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, + Immediate(16)); + sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); } SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); - SetRegister(bb, instr.gpr0, sum); + SetRegister(bb, instr.gpr0, std::move(sum)); return pc; } diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index 33bd31865..062b4f252 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -17,26 +17,37 @@ #include <algorithm> #include <cassert> -#include <cstdint> #include <cstring> #include <vector> +#include "common/common_types.h" + #include "video_core/textures/astc.h" +namespace { + +/// Count the number of bits set in a number. +constexpr u32 Popcnt(u32 n) { + u32 c = 0; + for (; n; c++) { + n &= n - 1; + } + return c; +} + +} // Anonymous namespace + class InputBitStream { public: - explicit InputBitStream(const unsigned char* ptr, int start_offset = 0) + explicit InputBitStream(const u8* ptr, std::size_t start_offset = 0) : m_CurByte(ptr), m_NextBit(start_offset % 8) {} - ~InputBitStream() = default; - - int GetBitsRead() const { + std::size_t GetBitsRead() const { return m_BitsRead; } - int ReadBit() { - - int bit = *m_CurByte >> m_NextBit++; + u32 ReadBit() { + u32 bit = *m_CurByte >> m_NextBit++; while (m_NextBit >= 8) { m_NextBit -= 8; m_CurByte++; @@ -46,57 +57,66 @@ public: return bit & 1; } - unsigned int ReadBits(unsigned int nBits) { - unsigned int ret = 0; - for (unsigned int i = 0; i < nBits; i++) { + u32 ReadBits(std::size_t nBits) { + u32 ret = 0; + for (std::size_t i = 0; i < nBits; ++i) { + ret |= (ReadBit() & 1) << i; + } + return ret; + } + + template <std::size_t nBits> + u32 ReadBits() { + u32 ret = 0; + for (std::size_t i = 0; i < nBits; ++i) { ret |= (ReadBit() & 1) << i; } return ret; } private: - const unsigned char* m_CurByte; - int m_NextBit = 0; - int m_BitsRead = 0; + const u8* m_CurByte; + std::size_t m_NextBit = 0; + std::size_t m_BitsRead = 0; }; class OutputBitStream { public: - explicit OutputBitStream(unsigned char* ptr, int nBits = 0, int start_offset = 0) + explicit OutputBitStream(u8* ptr, s32 nBits = 0, s32 start_offset = 0) : m_NumBits(nBits), m_CurByte(ptr), m_NextBit(start_offset % 8) {} ~OutputBitStream() = default; - int GetBitsWritten() const { + s32 GetBitsWritten() const { return m_BitsWritten; } - void WriteBitsR(unsigned int val, unsigned int nBits) { - for (unsigned int i = 0; i < nBits; i++) { + void WriteBitsR(u32 val, u32 nBits) { + for (u32 i = 0; i < nBits; i++) { WriteBit((val >> (nBits - i - 1)) & 1); } } - void WriteBits(unsigned int val, unsigned int nBits) { - for (unsigned int i = 0; i < nBits; i++) { + void WriteBits(u32 val, u32 nBits) { + for (u32 i = 0; i < nBits; i++) { WriteBit((val >> i) & 1); } } private: - void WriteBit(int b) { + void WriteBit(s32 b) { if (done) return; - const unsigned int mask = 1 << m_NextBit++; + const u32 mask = 1 << m_NextBit++; // clear the bit - *m_CurByte &= static_cast<unsigned char>(~mask); + *m_CurByte &= static_cast<u8>(~mask); // Write the bit, if necessary if (b) - *m_CurByte |= static_cast<unsigned char>(mask); + *m_CurByte |= static_cast<u8>(mask); // Next byte? if (m_NextBit >= 8) { @@ -107,10 +127,10 @@ private: done = done || ++m_BitsWritten >= m_NumBits; } - int m_BitsWritten = 0; - const int m_NumBits; - unsigned char* m_CurByte; - int m_NextBit = 0; + s32 m_BitsWritten = 0; + const s32 m_NumBits; + u8* m_CurByte; + s32 m_NextBit = 0; bool done = false; }; @@ -123,20 +143,20 @@ public: Bits(const Bits&) = delete; Bits& operator=(const Bits&) = delete; - uint8_t operator[](uint32_t bitPos) const { - return static_cast<uint8_t>((m_Bits >> bitPos) & 1); + u8 operator[](u32 bitPos) const { + return static_cast<u8>((m_Bits >> bitPos) & 1); } - IntType operator()(uint32_t start, uint32_t end) const { + IntType operator()(u32 start, u32 end) const { if (start == end) { return (*this)[start]; } else if (start > end) { - uint32_t t = start; + u32 t = start; start = end; end = t; } - uint64_t mask = (1 << (end - start + 1)) - 1; + u64 mask = (1 << (end - start + 1)) - 1; return (m_Bits >> start) & static_cast<IntType>(mask); } @@ -144,273 +164,236 @@ private: const IntType& m_Bits; }; -enum EIntegerEncoding { eIntegerEncoding_JustBits, eIntegerEncoding_Quint, eIntegerEncoding_Trit }; - -class IntegerEncodedValue { -private: - const EIntegerEncoding m_Encoding; - const uint32_t m_NumBits; - uint32_t m_BitValue; - union { - uint32_t m_QuintValue; - uint32_t m_TritValue; - }; +enum class IntegerEncoding { JustBits, Qus32, Trit }; -public: - // Jank, but we're not doing any heavy lifting in this class, so it's - // probably OK. It allows us to use these in std::vectors... - IntegerEncodedValue& operator=(const IntegerEncodedValue& other) { - new (this) IntegerEncodedValue(other); - return *this; - } +struct IntegerEncodedValue { + constexpr IntegerEncodedValue() = default; - IntegerEncodedValue(EIntegerEncoding encoding, uint32_t numBits) - : m_Encoding(encoding), m_NumBits(numBits) {} + constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) + : encoding{encoding_}, num_bits{num_bits_} {} - EIntegerEncoding GetEncoding() const { - return m_Encoding; - } - uint32_t BaseBitLength() const { - return m_NumBits; - } - - uint32_t GetBitValue() const { - return m_BitValue; - } - void SetBitValue(uint32_t val) { - m_BitValue = val; - } - - uint32_t GetTritValue() const { - return m_TritValue; - } - void SetTritValue(uint32_t val) { - m_TritValue = val; - } - - uint32_t GetQuintValue() const { - return m_QuintValue; - } - void SetQuintValue(uint32_t val) { - m_QuintValue = val; - } - - bool MatchesEncoding(const IntegerEncodedValue& other) const { - return m_Encoding == other.m_Encoding && m_NumBits == other.m_NumBits; + constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { + return encoding == other.encoding && num_bits == other.num_bits; } // Returns the number of bits required to encode nVals values. - uint32_t GetBitLength(uint32_t nVals) const { - uint32_t totalBits = m_NumBits * nVals; - if (m_Encoding == eIntegerEncoding_Trit) { + u32 GetBitLength(u32 nVals) const { + u32 totalBits = num_bits * nVals; + if (encoding == IntegerEncoding::Trit) { totalBits += (nVals * 8 + 4) / 5; - } else if (m_Encoding == eIntegerEncoding_Quint) { + } else if (encoding == IntegerEncoding::Qus32) { totalBits += (nVals * 7 + 2) / 3; } return totalBits; } - // Count the number of bits set in a number. - static inline uint32_t Popcnt(uint32_t n) { - uint32_t c; - for (c = 0; n; c++) { - n &= n - 1; + IntegerEncoding encoding{}; + u32 num_bits = 0; + u32 bit_value = 0; + union { + u32 qus32_value = 0; + u32 trit_value; + }; +}; + +static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, + u32 nBitsPerValue) { + // Implement the algorithm in section C.2.12 + u32 m[5]; + u32 t[5]; + u32 T; + + // Read the trit encoded block according to + // table C.2.14 + m[0] = bits.ReadBits(nBitsPerValue); + T = bits.ReadBits<2>(); + m[1] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBits<2>() << 2; + m[2] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBit() << 4; + m[3] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBits<2>() << 5; + m[4] = bits.ReadBits(nBitsPerValue); + T |= bits.ReadBit() << 7; + + u32 C = 0; + + Bits<u32> Tb(T); + if (Tb(2, 4) == 7) { + C = (Tb(5, 7) << 2) | Tb(0, 1); + t[4] = t[3] = 2; + } else { + C = Tb(0, 4); + if (Tb(5, 6) == 3) { + t[4] = 2; + t[3] = Tb[7]; + } else { + t[4] = Tb[7]; + t[3] = Tb(5, 6); } - return c; } - // Returns a new instance of this struct that corresponds to the - // can take no more than maxval values - static IntegerEncodedValue CreateEncoding(uint32_t maxVal) { - while (maxVal > 0) { - uint32_t check = maxVal + 1; - - // Is maxVal a power of two? - if (!(check & (check - 1))) { - return IntegerEncodedValue(eIntegerEncoding_JustBits, Popcnt(maxVal)); - } - - // Is maxVal of the type 3*2^n - 1? - if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { - return IntegerEncodedValue(eIntegerEncoding_Trit, Popcnt(check / 3 - 1)); - } + Bits<u32> Cb(C); + if (Cb(0, 1) == 3) { + t[2] = 2; + t[1] = Cb[4]; + t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); + } else if (Cb(2, 3) == 3) { + t[2] = 2; + t[1] = 2; + t[0] = Cb(0, 1); + } else { + t[2] = Cb[4]; + t[1] = Cb(2, 3); + t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); + } - // Is maxVal of the type 5*2^n - 1? - if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { - return IntegerEncodedValue(eIntegerEncoding_Quint, Popcnt(check / 5 - 1)); - } + for (std::size_t i = 0; i < 5; ++i) { + IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); + val.bit_value = m[i]; + val.trit_value = t[i]; + } +} - // Apparently it can't be represented with a bounded integer sequence... - // just iterate. - maxVal--; +static void DecodeQus32Block(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, + u32 nBitsPerValue) { + // Implement the algorithm in section C.2.12 + u32 m[3]; + u32 q[3]; + u32 Q; + + // Read the trit encoded block according to + // table C.2.15 + m[0] = bits.ReadBits(nBitsPerValue); + Q = bits.ReadBits<3>(); + m[1] = bits.ReadBits(nBitsPerValue); + Q |= bits.ReadBits<2>() << 3; + m[2] = bits.ReadBits(nBitsPerValue); + Q |= bits.ReadBits<2>() << 5; + + Bits<u32> Qb(Q); + if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { + q[0] = q[1] = 4; + q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); + } else { + u32 C = 0; + if (Qb(1, 2) == 3) { + q[2] = 4; + C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; + } else { + q[2] = Qb(5, 6); + C = Qb(0, 4); } - return IntegerEncodedValue(eIntegerEncoding_JustBits, 0); - } - - // Fills result with the values that are encoded in the given - // bitstream. We must know beforehand what the maximum possible - // value is, and how many values we're decoding. - static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, - InputBitStream& bits, uint32_t maxRange, uint32_t nValues) { - // Determine encoding parameters - IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(maxRange); - - // Start decoding - uint32_t nValsDecoded = 0; - while (nValsDecoded < nValues) { - switch (val.GetEncoding()) { - case eIntegerEncoding_Quint: - DecodeQuintBlock(bits, result, val.BaseBitLength()); - nValsDecoded += 3; - break; - case eIntegerEncoding_Trit: - DecodeTritBlock(bits, result, val.BaseBitLength()); - nValsDecoded += 5; - break; - - case eIntegerEncoding_JustBits: - val.SetBitValue(bits.ReadBits(val.BaseBitLength())); - result.push_back(val); - nValsDecoded++; - break; - } + Bits<u32> Cb(C); + if (Cb(0, 2) == 5) { + q[1] = 4; + q[0] = Cb(3, 4); + } else { + q[1] = Cb(3, 4); + q[0] = Cb(0, 2); } } -private: - static void DecodeTritBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, - uint32_t nBitsPerValue) { - // Implement the algorithm in section C.2.12 - uint32_t m[5]; - uint32_t t[5]; - uint32_t T; - - // Read the trit encoded block according to - // table C.2.14 - m[0] = bits.ReadBits(nBitsPerValue); - T = bits.ReadBits(2); - m[1] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBits(2) << 2; - m[2] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBit() << 4; - m[3] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBits(2) << 5; - m[4] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBit() << 7; - - uint32_t C = 0; - - Bits<uint32_t> Tb(T); - if (Tb(2, 4) == 7) { - C = (Tb(5, 7) << 2) | Tb(0, 1); - t[4] = t[3] = 2; - } else { - C = Tb(0, 4); - if (Tb(5, 6) == 3) { - t[4] = 2; - t[3] = Tb[7]; - } else { - t[4] = Tb[7]; - t[3] = Tb(5, 6); - } + for (std::size_t i = 0; i < 3; ++i) { + IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Qus32, nBitsPerValue); + val.bit_value = m[i]; + val.qus32_value = q[i]; + } +} + +// Returns a new instance of this struct that corresponds to the +// can take no more than maxval values +static constexpr IntegerEncodedValue CreateEncoding(u32 maxVal) { + while (maxVal > 0) { + u32 check = maxVal + 1; + + // Is maxVal a power of two? + if (!(check & (check - 1))) { + return IntegerEncodedValue(IntegerEncoding::JustBits, Popcnt(maxVal)); } - Bits<uint32_t> Cb(C); - if (Cb(0, 1) == 3) { - t[2] = 2; - t[1] = Cb[4]; - t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); - } else if (Cb(2, 3) == 3) { - t[2] = 2; - t[1] = 2; - t[0] = Cb(0, 1); - } else { - t[2] = Cb[4]; - t[1] = Cb(2, 3); - t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); + // Is maxVal of the type 3*2^n - 1? + if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Trit, Popcnt(check / 3 - 1)); } - for (uint32_t i = 0; i < 5; i++) { - IntegerEncodedValue val(eIntegerEncoding_Trit, nBitsPerValue); - val.SetBitValue(m[i]); - val.SetTritValue(t[i]); - result.push_back(val); + // Is maxVal of the type 5*2^n - 1? + if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) { + return IntegerEncodedValue(IntegerEncoding::Qus32, Popcnt(check / 5 - 1)); } + + // Apparently it can't be represented with a bounded integer sequence... + // just iterate. + maxVal--; } + return IntegerEncodedValue(IntegerEncoding::JustBits, 0); +} - static void DecodeQuintBlock(InputBitStream& bits, std::vector<IntegerEncodedValue>& result, - uint32_t nBitsPerValue) { - // Implement the algorithm in section C.2.12 - uint32_t m[3]; - uint32_t q[3]; - uint32_t Q; - - // Read the trit encoded block according to - // table C.2.15 - m[0] = bits.ReadBits(nBitsPerValue); - Q = bits.ReadBits(3); - m[1] = bits.ReadBits(nBitsPerValue); - Q |= bits.ReadBits(2) << 3; - m[2] = bits.ReadBits(nBitsPerValue); - Q |= bits.ReadBits(2) << 5; - - Bits<uint32_t> Qb(Q); - if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { - q[0] = q[1] = 4; - q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); - } else { - uint32_t C = 0; - if (Qb(1, 2) == 3) { - q[2] = 4; - C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; - } else { - q[2] = Qb(5, 6); - C = Qb(0, 4); - } +static constexpr std::array<IntegerEncodedValue, 256> MakeEncodedValues() { + std::array<IntegerEncodedValue, 256> encodings{}; + for (std::size_t i = 0; i < encodings.size(); ++i) { + encodings[i] = CreateEncoding(static_cast<u32>(i)); + } + return encodings; +} - Bits<uint32_t> Cb(C); - if (Cb(0, 2) == 5) { - q[1] = 4; - q[0] = Cb(3, 4); - } else { - q[1] = Cb(3, 4); - q[0] = Cb(0, 2); - } - } +static constexpr std::array EncodingsValues = MakeEncodedValues(); + +// Fills result with the values that are encoded in the given +// bitstream. We must know beforehand what the maximum possible +// value is, and how many values we're decoding. +static void DecodeIntegerSequence(std::vector<IntegerEncodedValue>& result, InputBitStream& bits, + u32 maxRange, u32 nValues) { + // Determine encoding parameters + IntegerEncodedValue val = EncodingsValues[maxRange]; + + // Start decoding + u32 nValsDecoded = 0; + while (nValsDecoded < nValues) { + switch (val.encoding) { + case IntegerEncoding::Qus32: + DecodeQus32Block(bits, result, val.num_bits); + nValsDecoded += 3; + break; + + case IntegerEncoding::Trit: + DecodeTritBlock(bits, result, val.num_bits); + nValsDecoded += 5; + break; - for (uint32_t i = 0; i < 3; i++) { - IntegerEncodedValue val(eIntegerEncoding_Quint, nBitsPerValue); - val.m_BitValue = m[i]; - val.m_QuintValue = q[i]; + case IntegerEncoding::JustBits: + val.bit_value = bits.ReadBits(val.num_bits); result.push_back(val); + nValsDecoded++; + break; } } -}; +} namespace ASTCC { struct TexelWeightParams { - uint32_t m_Width = 0; - uint32_t m_Height = 0; + u32 m_Width = 0; + u32 m_Height = 0; bool m_bDualPlane = false; - uint32_t m_MaxWeight = 0; + u32 m_MaxWeight = 0; bool m_bError = false; bool m_bVoidExtentLDR = false; bool m_bVoidExtentHDR = false; - uint32_t GetPackedBitSize() const { + u32 GetPackedBitSize() const { // How many indices do we have? - uint32_t nIdxs = m_Height * m_Width; + u32 nIdxs = m_Height * m_Width; if (m_bDualPlane) { nIdxs *= 2; } - return IntegerEncodedValue::CreateEncoding(m_MaxWeight).GetBitLength(nIdxs); + return EncodingsValues[m_MaxWeight].GetBitLength(nIdxs); } - uint32_t GetNumWeightValues() const { - uint32_t ret = m_Width * m_Height; + u32 GetNumWeightValues() const { + u32 ret = m_Width * m_Height; if (m_bDualPlane) { ret *= 2; } @@ -422,7 +405,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { TexelWeightParams params; // Read the entire block mode all at once - uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11)); + u16 modeBits = static_cast<u16>(strm.ReadBits<11>()); // Does this match the void extent block mode? if ((modeBits & 0x01FF) == 0x1FC) { @@ -457,7 +440,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { // of the block mode. Layout is determined by a number // between 0 and 9 corresponding to table C.2.8 of the // ASTC spec. - uint32_t layout = 0; + u32 layout = 0; if ((modeBits & 0x1) || (modeBits & 0x2)) { // layout is in [0-4] @@ -509,7 +492,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { assert(layout < 10); // Determine R - uint32_t R = !!(modeBits & 0x10); + u32 R = !!(modeBits & 0x10); if (layout < 5) { R |= (modeBits & 0x3) << 1; } else { @@ -520,54 +503,54 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { // Determine width & height switch (layout) { case 0: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x3; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; params.m_Width = B + 4; params.m_Height = A + 2; break; } case 1: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x3; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; params.m_Width = B + 8; params.m_Height = A + 2; break; } case 2: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x3; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x3; params.m_Width = A + 2; params.m_Height = B + 8; break; } case 3: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x1; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x1; params.m_Width = A + 2; params.m_Height = B + 6; break; } case 4: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 7) & 0x1; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 7) & 0x1; params.m_Width = B + 2; params.m_Height = A + 2; break; } case 5: { - uint32_t A = (modeBits >> 5) & 0x3; + u32 A = (modeBits >> 5) & 0x3; params.m_Width = 12; params.m_Height = A + 2; break; } case 6: { - uint32_t A = (modeBits >> 5) & 0x3; + u32 A = (modeBits >> 5) & 0x3; params.m_Width = A + 2; params.m_Height = 12; break; @@ -586,15 +569,15 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { } case 9: { - uint32_t A = (modeBits >> 5) & 0x3; - uint32_t B = (modeBits >> 9) & 0x3; + u32 A = (modeBits >> 5) & 0x3; + u32 B = (modeBits >> 9) & 0x3; params.m_Width = A + 6; params.m_Height = B + 6; break; } default: - assert(!"Don't know this layout..."); + assert(false && "Don't know this layout..."); params.m_bError = true; break; } @@ -605,10 +588,10 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { bool H = (layout != 9) && (modeBits & 0x200); if (H) { - const uint32_t maxWeights[6] = {9, 11, 15, 19, 23, 31}; + const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; params.m_MaxWeight = maxWeights[R - 2]; } else { - const uint32_t maxWeights[6] = {1, 2, 3, 4, 5, 7}; + const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; params.m_MaxWeight = maxWeights[R - 2]; } @@ -617,32 +600,32 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) { return params; } -static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint32_t blockWidth, - uint32_t blockHeight) { +static void FillVoidExtentLDR(InputBitStream& strm, u32* const outBuf, u32 blockWidth, + u32 blockHeight) { // Don't actually care about the void extent, just read the bits... - for (int i = 0; i < 4; ++i) { - strm.ReadBits(13); + for (s32 i = 0; i < 4; ++i) { + strm.ReadBits<13>(); } // Decode the RGBA components and renormalize them to the range [0, 255] - uint16_t r = static_cast<uint16_t>(strm.ReadBits(16)); - uint16_t g = static_cast<uint16_t>(strm.ReadBits(16)); - uint16_t b = static_cast<uint16_t>(strm.ReadBits(16)); - uint16_t a = static_cast<uint16_t>(strm.ReadBits(16)); + u16 r = static_cast<u16>(strm.ReadBits<16>()); + u16 g = static_cast<u16>(strm.ReadBits<16>()); + u16 b = static_cast<u16>(strm.ReadBits<16>()); + u16 a = static_cast<u16>(strm.ReadBits<16>()); - uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 | - (static_cast<uint32_t>(a) & 0xFF00) << 16; + u32 rgba = (r >> 8) | (g & 0xFF00) | (static_cast<u32>(b) & 0xFF00) << 8 | + (static_cast<u32>(a) & 0xFF00) << 16; - for (uint32_t j = 0; j < blockHeight; j++) { - for (uint32_t i = 0; i < blockWidth; i++) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { outBuf[j * blockWidth + i] = rgba; } } } -static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeight) { - for (uint32_t j = 0; j < blockHeight; j++) { - for (uint32_t i = 0; i < blockWidth; i++) { +static void FillError(u32* outBuf, u32 blockWidth, u32 blockHeight) { + for (u32 j = 0; j < blockHeight; j++) { + for (u32 i = 0; i < blockWidth; i++) { outBuf[j * blockWidth + i] = 0xFFFF00FF; } } @@ -651,18 +634,18 @@ static void FillError(uint32_t* outBuf, uint32_t blockWidth, uint32_t blockHeigh // Replicates low numBits such that [(toBit - 1):(toBit - 1 - fromBit)] // is the same as [(numBits - 1):0] and repeats all the way down. template <typename IntType> -static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { +static IntType Replicate(IntType val, u32 numBits, u32 toBit) { if (numBits == 0) return 0; if (toBit == 0) return 0; IntType v = val & static_cast<IntType>((1 << numBits) - 1); IntType res = v; - uint32_t reslen = numBits; + u32 reslen = numBits; while (reslen < toBit) { - uint32_t comp = 0; + u32 comp = 0; if (numBits > toBit - reslen) { - uint32_t newshift = toBit - reslen; + u32 newshift = toBit - reslen; comp = numBits - newshift; numBits = newshift; } @@ -675,14 +658,14 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) { class Pixel { protected: - using ChannelType = int16_t; - uint8_t m_BitDepth[4] = {8, 8, 8, 8}; - int16_t color[4] = {}; + using ChannelType = s16; + u8 m_BitDepth[4] = {8, 8, 8, 8}; + s16 color[4] = {}; public: Pixel() = default; - Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8) - : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)}, + Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) + : m_BitDepth{u8(bitDepth), u8(bitDepth), u8(bitDepth), u8(bitDepth)}, color{static_cast<ChannelType>(a), static_cast<ChannelType>(r), static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {} @@ -691,22 +674,22 @@ public: // significant bits when going from larger to smaller bit depth // or by repeating the most significant bits when going from // smaller to larger bit depths. - void ChangeBitDepth(const uint8_t (&depth)[4]) { - for (uint32_t i = 0; i < 4; i++) { + void ChangeBitDepth(const u8 (&depth)[4]) { + for (u32 i = 0; i < 4; i++) { Component(i) = ChangeBitDepth(Component(i), m_BitDepth[i], depth[i]); m_BitDepth[i] = depth[i]; } } template <typename IntType> - static float ConvertChannelToFloat(IntType channel, uint8_t bitDepth) { + static float ConvertChannelToFloat(IntType channel, u8 bitDepth) { float denominator = static_cast<float>((1 << bitDepth) - 1); return static_cast<float>(channel) / denominator; } // Changes the bit depth of a single component. See the comment // above for how we do this. - static ChannelType ChangeBitDepth(Pixel::ChannelType val, uint8_t oldDepth, uint8_t newDepth) { + static ChannelType ChangeBitDepth(Pixel::ChannelType val, u8 oldDepth, u8 newDepth) { assert(newDepth <= 8); assert(oldDepth <= 8); @@ -722,16 +705,15 @@ public: if (newDepth == 0) { return 0xFF; } else { - uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth); - uint16_t v = static_cast<uint16_t>(val); - v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted); - v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), - static_cast<uint16_t>((1 << newDepth) - 1)); - return static_cast<uint8_t>(v); + u8 bitsWasted = static_cast<u8>(oldDepth - newDepth); + u16 v = static_cast<u16>(val); + v = static_cast<u16>((v + (1 << (bitsWasted - 1))) >> bitsWasted); + v = ::std::min<u16>(::std::max<u16>(0, v), static_cast<u16>((1 << newDepth) - 1)); + return static_cast<u8>(v); } } - assert(!"We shouldn't get here."); + assert(false && "We shouldn't get here."); return 0; } @@ -759,15 +741,15 @@ public: ChannelType& B() { return color[3]; } - const ChannelType& Component(uint32_t idx) const { + const ChannelType& Component(u32 idx) const { return color[idx]; } - ChannelType& Component(uint32_t idx) { + ChannelType& Component(u32 idx) { return color[idx]; } - void GetBitDepth(uint8_t (&outDepth)[4]) const { - for (int i = 0; i < 4; i++) { + void GetBitDepth(u8 (&outDepth)[4]) const { + for (s32 i = 0; i < 4; i++) { outDepth[i] = m_BitDepth[i]; } } @@ -776,12 +758,12 @@ public: // and then pack each channel into an R8G8B8A8 32-bit integer. We assume // that the architecture is little-endian, so the alpha channel will end // up in the most-significant byte. - uint32_t Pack() const { + u32 Pack() const { Pixel eightBit(*this); - const uint8_t eightBitDepth[4] = {8, 8, 8, 8}; + const u8 eightBitDepth[4] = {8, 8, 8, 8}; eightBit.ChangeBitDepth(eightBitDepth); - uint32_t r = 0; + u32 r = 0; r |= eightBit.A(); r <<= 8; r |= eightBit.B(); @@ -794,7 +776,7 @@ public: // Clamps the pixel to the range [0,255] void ClampByte() { - for (uint32_t i = 0; i < 4; i++) { + for (u32 i = 0; i < 4; i++) { color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); } } @@ -804,24 +786,24 @@ public: } }; -static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* modes, - const uint32_t nPartitions, const uint32_t nBitsForColorData) { +static void DecodeColorValues(u32* out, u8* data, const u32* modes, const u32 nPartitions, + const u32 nBitsForColorData) { // First figure out how many color values we have - uint32_t nValues = 0; - for (uint32_t i = 0; i < nPartitions; i++) { + u32 nValues = 0; + for (u32 i = 0; i < nPartitions; i++) { nValues += ((modes[i] >> 2) + 1) << 1; } // Then based on the number of values and the remaining number of bits, // figure out the max value for each of them... - uint32_t range = 256; + u32 range = 256; while (--range > 0) { - IntegerEncodedValue val = IntegerEncodedValue::CreateEncoding(range); - uint32_t bitLength = val.GetBitLength(nValues); + IntegerEncodedValue val = EncodingsValues[range]; + u32 bitLength = val.GetBitLength(nValues); if (bitLength <= nBitsForColorData) { // Find the smallest possible range that matches the given encoding while (--range > 0) { - IntegerEncodedValue newval = IntegerEncodedValue::CreateEncoding(range); + IntegerEncodedValue newval = EncodingsValues[range]; if (!newval.MatchesEncoding(val)) { break; } @@ -835,12 +817,14 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode // We now have enough to decode our integer sequence. std::vector<IntegerEncodedValue> decodedColorValues; + decodedColorValues.reserve(32); + InputBitStream colorStream(data); - IntegerEncodedValue::DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); + DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); // Once we have the decoded values, we need to dequantize them to the 0-255 range // This procedure is outlined in ASTC spec C.2.13 - uint32_t outIdx = 0; + u32 outIdx = 0; for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { // Have we already decoded all that we need? if (outIdx >= nValues) { @@ -848,25 +832,25 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode } const IntegerEncodedValue& val = *itr; - uint32_t bitlen = val.BaseBitLength(); - uint32_t bitval = val.GetBitValue(); + u32 bitlen = val.num_bits; + u32 bitval = val.bit_value; assert(bitlen >= 1); - uint32_t A = 0, B = 0, C = 0, D = 0; + u32 A = 0, B = 0, C = 0, D = 0; // A is just the lsb replicated 9 times. A = Replicate(bitval & 1, 1, 9); - switch (val.GetEncoding()) { + switch (val.encoding) { // Replicate bits - case eIntegerEncoding_JustBits: + case IntegerEncoding::JustBits: out[outIdx++] = Replicate(bitval, bitlen, 8); break; // Use algorithm in C.2.13 - case eIntegerEncoding_Trit: { + case IntegerEncoding::Trit: { - D = val.GetTritValue(); + D = val.trit_value; switch (bitlen) { case 1: { @@ -876,48 +860,48 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode case 2: { C = 93; // B = b000b0bb0 - uint32_t b = (bitval >> 1) & 1; + u32 b = (bitval >> 1) & 1; B = (b << 8) | (b << 4) | (b << 2) | (b << 1); } break; case 3: { C = 44; // B = cb000cbcb - uint32_t cb = (bitval >> 1) & 3; + u32 cb = (bitval >> 1) & 3; B = (cb << 7) | (cb << 2) | cb; } break; case 4: { C = 22; // B = dcb000dcb - uint32_t dcb = (bitval >> 1) & 7; + u32 dcb = (bitval >> 1) & 7; B = (dcb << 6) | dcb; } break; case 5: { C = 11; // B = edcb000ed - uint32_t edcb = (bitval >> 1) & 0xF; + u32 edcb = (bitval >> 1) & 0xF; B = (edcb << 5) | (edcb >> 2); } break; case 6: { C = 5; // B = fedcb000f - uint32_t fedcb = (bitval >> 1) & 0x1F; + u32 fedcb = (bitval >> 1) & 0x1F; B = (fedcb << 4) | (fedcb >> 4); } break; default: - assert(!"Unsupported trit encoding for color values!"); + assert(false && "Unsupported trit encoding for color values!"); break; } // switch(bitlen) - } // case eIntegerEncoding_Trit + } // case IntegerEncoding::Trit break; - case eIntegerEncoding_Quint: { + case IntegerEncoding::Qus32: { - D = val.GetQuintValue(); + D = val.qus32_value; switch (bitlen) { case 1: { @@ -927,41 +911,41 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode case 2: { C = 54; // B = b0000bb00 - uint32_t b = (bitval >> 1) & 1; + u32 b = (bitval >> 1) & 1; B = (b << 8) | (b << 3) | (b << 2); } break; case 3: { C = 26; // B = cb0000cbc - uint32_t cb = (bitval >> 1) & 3; + u32 cb = (bitval >> 1) & 3; B = (cb << 7) | (cb << 1) | (cb >> 1); } break; case 4: { C = 13; // B = dcb0000dc - uint32_t dcb = (bitval >> 1) & 7; + u32 dcb = (bitval >> 1) & 7; B = (dcb << 6) | (dcb >> 1); } break; case 5: { C = 6; // B = edcb0000e - uint32_t edcb = (bitval >> 1) & 0xF; + u32 edcb = (bitval >> 1) & 0xF; B = (edcb << 5) | (edcb >> 3); } break; default: - assert(!"Unsupported quint encoding for color values!"); + assert(false && "Unsupported quint encoding for color values!"); break; } // switch(bitlen) - } // case eIntegerEncoding_Quint + } // case IntegerEncoding::Qus32 break; - } // switch(val.GetEncoding()) + } // switch(val.encoding) - if (val.GetEncoding() != eIntegerEncoding_JustBits) { - uint32_t T = D * C + B; + if (val.encoding != IntegerEncoding::JustBits) { + u32 T = D * C + B; T ^= A; T = (A & 0x80) | (T >> 2); out[outIdx++] = T; @@ -969,31 +953,31 @@ static void DecodeColorValues(uint32_t* out, uint8_t* data, const uint32_t* mode } // Make sure that each of our values is in the proper range... - for (uint32_t i = 0; i < nValues; i++) { + for (u32 i = 0; i < nValues; i++) { assert(out[i] <= 255); } } -static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { - uint32_t bitval = val.GetBitValue(); - uint32_t bitlen = val.BaseBitLength(); +static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { + u32 bitval = val.bit_value; + u32 bitlen = val.num_bits; - uint32_t A = Replicate(bitval & 1, 1, 7); - uint32_t B = 0, C = 0, D = 0; + u32 A = Replicate(bitval & 1, 1, 7); + u32 B = 0, C = 0, D = 0; - uint32_t result = 0; - switch (val.GetEncoding()) { - case eIntegerEncoding_JustBits: + u32 result = 0; + switch (val.encoding) { + case IntegerEncoding::JustBits: result = Replicate(bitval, bitlen, 6); break; - case eIntegerEncoding_Trit: { - D = val.GetTritValue(); + case IntegerEncoding::Trit: { + D = val.trit_value; assert(D < 3); switch (bitlen) { case 0: { - uint32_t results[3] = {0, 32, 63}; + u32 results[3] = {0, 32, 63}; result = results[D]; } break; @@ -1003,29 +987,29 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { case 2: { C = 23; - uint32_t b = (bitval >> 1) & 1; + u32 b = (bitval >> 1) & 1; B = (b << 6) | (b << 2) | b; } break; case 3: { C = 11; - uint32_t cb = (bitval >> 1) & 3; + u32 cb = (bitval >> 1) & 3; B = (cb << 5) | cb; } break; default: - assert(!"Invalid trit encoding for texel weight"); + assert(false && "Invalid trit encoding for texel weight"); break; } } break; - case eIntegerEncoding_Quint: { - D = val.GetQuintValue(); + case IntegerEncoding::Qus32: { + D = val.qus32_value; assert(D < 5); switch (bitlen) { case 0: { - uint32_t results[5] = {0, 16, 32, 47, 63}; + u32 results[5] = {0, 16, 32, 47, 63}; result = results[D]; } break; @@ -1035,18 +1019,18 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { case 2: { C = 13; - uint32_t b = (bitval >> 1) & 1; + u32 b = (bitval >> 1) & 1; B = (b << 6) | (b << 1); } break; default: - assert(!"Invalid quint encoding for texel weight"); + assert(false && "Invalid quint encoding for texel weight"); break; } } break; } - if (val.GetEncoding() != eIntegerEncoding_JustBits && bitlen > 0) { + if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { // Decode the value... result = D * C + B; result ^= A; @@ -1063,12 +1047,11 @@ static uint32_t UnquantizeTexelWeight(const IntegerEncodedValue& val) { return result; } -static void UnquantizeTexelWeights(uint32_t out[2][144], - const std::vector<IntegerEncodedValue>& weights, - const TexelWeightParams& params, const uint32_t blockWidth, - const uint32_t blockHeight) { - uint32_t weightIdx = 0; - uint32_t unquantized[2][144]; +static void UnquantizeTexelWeights(u32 out[2][144], const std::vector<IntegerEncodedValue>& weights, + const TexelWeightParams& params, const u32 blockWidth, + const u32 blockHeight) { + u32 weightIdx = 0; + u32 unquantized[2][144]; for (auto itr = weights.begin(); itr != weights.end(); ++itr) { unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); @@ -1086,34 +1069,34 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], } // Do infill if necessary (Section C.2.18) ... - uint32_t Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); - uint32_t Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); + u32 Ds = (1024 + (blockWidth / 2)) / (blockWidth - 1); + u32 Dt = (1024 + (blockHeight / 2)) / (blockHeight - 1); - const uint32_t kPlaneScale = params.m_bDualPlane ? 2U : 1U; - for (uint32_t plane = 0; plane < kPlaneScale; plane++) - for (uint32_t t = 0; t < blockHeight; t++) - for (uint32_t s = 0; s < blockWidth; s++) { - uint32_t cs = Ds * s; - uint32_t ct = Dt * t; + const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; + for (u32 plane = 0; plane < kPlaneScale; plane++) + for (u32 t = 0; t < blockHeight; t++) + for (u32 s = 0; s < blockWidth; s++) { + u32 cs = Ds * s; + u32 ct = Dt * t; - uint32_t gs = (cs * (params.m_Width - 1) + 32) >> 6; - uint32_t gt = (ct * (params.m_Height - 1) + 32) >> 6; + u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; + u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; - uint32_t js = gs >> 4; - uint32_t fs = gs & 0xF; + u32 js = gs >> 4; + u32 fs = gs & 0xF; - uint32_t jt = gt >> 4; - uint32_t ft = gt & 0x0F; + u32 jt = gt >> 4; + u32 ft = gt & 0x0F; - uint32_t w11 = (fs * ft + 8) >> 4; - uint32_t w10 = ft - w11; - uint32_t w01 = fs - w11; - uint32_t w00 = 16 - fs - ft + w11; + u32 w11 = (fs * ft + 8) >> 4; + u32 w10 = ft - w11; + u32 w01 = fs - w11; + u32 w00 = 16 - fs - ft + w11; - uint32_t v0 = js + jt * params.m_Width; + u32 v0 = js + jt * params.m_Width; #define FIND_TEXEL(tidx, bidx) \ - uint32_t p##bidx = 0; \ + u32 p##bidx = 0; \ do { \ if ((tidx) < (params.m_Width * params.m_Height)) { \ p##bidx = unquantized[plane][(tidx)]; \ @@ -1133,7 +1116,7 @@ static void UnquantizeTexelWeights(uint32_t out[2][144], } // Transfers a bit as described in C.2.14 -static inline void BitTransferSigned(int32_t& a, int32_t& b) { +static inline void BitTransferSigned(s32& a, s32& b) { b >>= 1; b |= a & 0x80; a >>= 1; @@ -1144,14 +1127,14 @@ static inline void BitTransferSigned(int32_t& a, int32_t& b) { // Adds more precision to the blue channel as described // in C.2.14 -static inline Pixel BlueContract(int32_t a, int32_t r, int32_t g, int32_t b) { - return Pixel(static_cast<int16_t>(a), static_cast<int16_t>((r + b) >> 1), - static_cast<int16_t>((g + b) >> 1), static_cast<int16_t>(b)); +static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { + return Pixel(static_cast<s16>(a), static_cast<s16>((r + b) >> 1), + static_cast<s16>((g + b) >> 1), static_cast<s16>(b)); } // Partition selection functions as specified in // C.2.21 -static inline uint32_t hash52(uint32_t p) { +static inline u32 hash52(u32 p) { p ^= p >> 15; p -= p << 17; p += p << 7; @@ -1165,8 +1148,7 @@ static inline uint32_t hash52(uint32_t p) { return p; } -static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, - int32_t partitionCount, int32_t smallBlock) { +static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { if (1 == partitionCount) return 0; @@ -1178,34 +1160,34 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, seed += (partitionCount - 1) * 1024; - uint32_t rnum = hash52(static_cast<uint32_t>(seed)); - uint8_t seed1 = static_cast<uint8_t>(rnum & 0xF); - uint8_t seed2 = static_cast<uint8_t>((rnum >> 4) & 0xF); - uint8_t seed3 = static_cast<uint8_t>((rnum >> 8) & 0xF); - uint8_t seed4 = static_cast<uint8_t>((rnum >> 12) & 0xF); - uint8_t seed5 = static_cast<uint8_t>((rnum >> 16) & 0xF); - uint8_t seed6 = static_cast<uint8_t>((rnum >> 20) & 0xF); - uint8_t seed7 = static_cast<uint8_t>((rnum >> 24) & 0xF); - uint8_t seed8 = static_cast<uint8_t>((rnum >> 28) & 0xF); - uint8_t seed9 = static_cast<uint8_t>((rnum >> 18) & 0xF); - uint8_t seed10 = static_cast<uint8_t>((rnum >> 22) & 0xF); - uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF); - uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF); - - seed1 = static_cast<uint8_t>(seed1 * seed1); - seed2 = static_cast<uint8_t>(seed2 * seed2); - seed3 = static_cast<uint8_t>(seed3 * seed3); - seed4 = static_cast<uint8_t>(seed4 * seed4); - seed5 = static_cast<uint8_t>(seed5 * seed5); - seed6 = static_cast<uint8_t>(seed6 * seed6); - seed7 = static_cast<uint8_t>(seed7 * seed7); - seed8 = static_cast<uint8_t>(seed8 * seed8); - seed9 = static_cast<uint8_t>(seed9 * seed9); - seed10 = static_cast<uint8_t>(seed10 * seed10); - seed11 = static_cast<uint8_t>(seed11 * seed11); - seed12 = static_cast<uint8_t>(seed12 * seed12); - - int32_t sh1, sh2, sh3; + u32 rnum = hash52(static_cast<u32>(seed)); + u8 seed1 = static_cast<u8>(rnum & 0xF); + u8 seed2 = static_cast<u8>((rnum >> 4) & 0xF); + u8 seed3 = static_cast<u8>((rnum >> 8) & 0xF); + u8 seed4 = static_cast<u8>((rnum >> 12) & 0xF); + u8 seed5 = static_cast<u8>((rnum >> 16) & 0xF); + u8 seed6 = static_cast<u8>((rnum >> 20) & 0xF); + u8 seed7 = static_cast<u8>((rnum >> 24) & 0xF); + u8 seed8 = static_cast<u8>((rnum >> 28) & 0xF); + u8 seed9 = static_cast<u8>((rnum >> 18) & 0xF); + u8 seed10 = static_cast<u8>((rnum >> 22) & 0xF); + u8 seed11 = static_cast<u8>((rnum >> 26) & 0xF); + u8 seed12 = static_cast<u8>(((rnum >> 30) | (rnum << 2)) & 0xF); + + seed1 = static_cast<u8>(seed1 * seed1); + seed2 = static_cast<u8>(seed2 * seed2); + seed3 = static_cast<u8>(seed3 * seed3); + seed4 = static_cast<u8>(seed4 * seed4); + seed5 = static_cast<u8>(seed5 * seed5); + seed6 = static_cast<u8>(seed6 * seed6); + seed7 = static_cast<u8>(seed7 * seed7); + seed8 = static_cast<u8>(seed8 * seed8); + seed9 = static_cast<u8>(seed9 * seed9); + seed10 = static_cast<u8>(seed10 * seed10); + seed11 = static_cast<u8>(seed11 * seed11); + seed12 = static_cast<u8>(seed12 * seed12); + + s32 sh1, sh2, sh3; if (seed & 1) { sh1 = (seed & 2) ? 4 : 5; sh2 = (partitionCount == 3) ? 6 : 5; @@ -1215,23 +1197,23 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, } sh3 = (seed & 0x10) ? sh1 : sh2; - seed1 = static_cast<uint8_t>(seed1 >> sh1); - seed2 = static_cast<uint8_t>(seed2 >> sh2); - seed3 = static_cast<uint8_t>(seed3 >> sh1); - seed4 = static_cast<uint8_t>(seed4 >> sh2); - seed5 = static_cast<uint8_t>(seed5 >> sh1); - seed6 = static_cast<uint8_t>(seed6 >> sh2); - seed7 = static_cast<uint8_t>(seed7 >> sh1); - seed8 = static_cast<uint8_t>(seed8 >> sh2); - seed9 = static_cast<uint8_t>(seed9 >> sh3); - seed10 = static_cast<uint8_t>(seed10 >> sh3); - seed11 = static_cast<uint8_t>(seed11 >> sh3); - seed12 = static_cast<uint8_t>(seed12 >> sh3); - - int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); - int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); - int32_t c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); - int32_t d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); + seed1 = static_cast<u8>(seed1 >> sh1); + seed2 = static_cast<u8>(seed2 >> sh2); + seed3 = static_cast<u8>(seed3 >> sh1); + seed4 = static_cast<u8>(seed4 >> sh2); + seed5 = static_cast<u8>(seed5 >> sh1); + seed6 = static_cast<u8>(seed6 >> sh2); + seed7 = static_cast<u8>(seed7 >> sh1); + seed8 = static_cast<u8>(seed8 >> sh2); + seed9 = static_cast<u8>(seed9 >> sh3); + seed10 = static_cast<u8>(seed10 >> sh3); + seed11 = static_cast<u8>(seed11 >> sh3); + seed12 = static_cast<u8>(seed12 >> sh3); + + s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); + s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); + s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); + s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); a &= 0x3F; b &= 0x3F; @@ -1252,27 +1234,26 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z, return 3; } -static inline uint32_t Select2DPartition(int32_t seed, int32_t x, int32_t y, int32_t partitionCount, - int32_t smallBlock) { +static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); } // Section C.2.14 -static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValues, - uint32_t colorEndpointMode) { +static void ComputeEndpos32s(Pixel& ep1, Pixel& ep2, const u32*& colorValues, + u32 colorEndpos32Mode) { #define READ_UINT_VALUES(N) \ - uint32_t v[N]; \ - for (uint32_t i = 0; i < N; i++) { \ + u32 v[N]; \ + for (u32 i = 0; i < N; i++) { \ v[i] = *(colorValues++); \ } #define READ_INT_VALUES(N) \ - int32_t v[N]; \ - for (uint32_t i = 0; i < N; i++) { \ - v[i] = static_cast<int32_t>(*(colorValues++)); \ + s32 v[N]; \ + for (u32 i = 0; i < N; i++) { \ + v[i] = static_cast<s32>(*(colorValues++)); \ } - switch (colorEndpointMode) { + switch (colorEndpos32Mode) { case 0: { READ_UINT_VALUES(2) ep1 = Pixel(0xFF, v[0], v[0], v[0]); @@ -1281,8 +1262,8 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue case 1: { READ_UINT_VALUES(2) - uint32_t L0 = (v[0] >> 2) | (v[1] & 0xC0); - uint32_t L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); + u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); + u32 L1 = std::max(L0 + (v[1] & 0x3F), 0xFFU); ep1 = Pixel(0xFF, L0, L0, L0); ep2 = Pixel(0xFF, L1, L1, L1); } break; @@ -1371,7 +1352,7 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue } break; default: - assert(!"Unsupported color endpoint mode (is it HDR?)"); + assert(false && "Unsupported color endpoint mode (is it HDR?)"); break; } @@ -1379,14 +1360,14 @@ static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const uint32_t*& colorValue #undef READ_INT_VALUES } -static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, - const uint32_t blockHeight, uint32_t* outBuf) { +static void DecompressBlock(const u8 inBuf[16], const u32 blockWidth, const u32 blockHeight, + u32* outBuf) { InputBitStream strm(inBuf); TexelWeightParams weightParams = DecodeBlockInfo(strm); // Was there an error? if (weightParams.m_bError) { - assert(!"Invalid block mode"); + assert(false && "Invalid block mode"); FillError(outBuf, blockWidth, blockHeight); return; } @@ -1397,63 +1378,63 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, } if (weightParams.m_bVoidExtentHDR) { - assert(!"HDR void extent blocks are unsupported!"); + assert(false && "HDR void extent blocks are unsupported!"); FillError(outBuf, blockWidth, blockHeight); return; } if (weightParams.m_Width > blockWidth) { - assert(!"Texel weight grid width should be smaller than block width"); + assert(false && "Texel weight grid width should be smaller than block width"); FillError(outBuf, blockWidth, blockHeight); return; } if (weightParams.m_Height > blockHeight) { - assert(!"Texel weight grid height should be smaller than block height"); + assert(false && "Texel weight grid height should be smaller than block height"); FillError(outBuf, blockWidth, blockHeight); return; } // Read num partitions - uint32_t nPartitions = strm.ReadBits(2) + 1; + u32 nPartitions = strm.ReadBits<2>() + 1; assert(nPartitions <= 4); if (nPartitions == 4 && weightParams.m_bDualPlane) { - assert(!"Dual plane mode is incompatible with four partition blocks"); + assert(false && "Dual plane mode is incompatible with four partition blocks"); FillError(outBuf, blockWidth, blockHeight); return; } - // Based on the number of partitions, read the color endpoint mode for + // Based on the number of partitions, read the color endpos32 mode for // each partition. - // Determine partitions, partition index, and color endpoint modes - int32_t planeIdx = -1; - uint32_t partitionIndex; - uint32_t colorEndpointMode[4] = {0, 0, 0, 0}; + // Determine partitions, partition index, and color endpos32 modes + s32 planeIdx = -1; + u32 partitionIndex; + u32 colorEndpos32Mode[4] = {0, 0, 0, 0}; // Define color data. - uint8_t colorEndpointData[16]; - memset(colorEndpointData, 0, sizeof(colorEndpointData)); - OutputBitStream colorEndpointStream(colorEndpointData, 16 * 8, 0); + u8 colorEndpos32Data[16]; + memset(colorEndpos32Data, 0, sizeof(colorEndpos32Data)); + OutputBitStream colorEndpos32Stream(colorEndpos32Data, 16 * 8, 0); // Read extra config data... - uint32_t baseCEM = 0; + u32 baseCEM = 0; if (nPartitions == 1) { - colorEndpointMode[0] = strm.ReadBits(4); + colorEndpos32Mode[0] = strm.ReadBits<4>(); partitionIndex = 0; } else { - partitionIndex = strm.ReadBits(10); - baseCEM = strm.ReadBits(6); + partitionIndex = strm.ReadBits<10>(); + baseCEM = strm.ReadBits<6>(); } - uint32_t baseMode = (baseCEM & 3); + u32 baseMode = (baseCEM & 3); - // Remaining bits are color endpoint data... - uint32_t nWeightBits = weightParams.GetPackedBitSize(); - int32_t remainingBits = 128 - nWeightBits - strm.GetBitsRead(); + // Remaining bits are color endpos32 data... + u32 nWeightBits = weightParams.GetPackedBitSize(); + s32 remainingBits = 128 - nWeightBits - static_cast<s32>(strm.GetBitsRead()); // Consider extra bits prior to texel data... - uint32_t extraCEMbits = 0; + u32 extraCEMbits = 0; if (baseMode) { switch (nPartitions) { case 2: @@ -1473,18 +1454,18 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, remainingBits -= extraCEMbits; // Do we have a dual plane situation? - uint32_t planeSelectorBits = 0; + u32 planeSelectorBits = 0; if (weightParams.m_bDualPlane) { planeSelectorBits = 2; } remainingBits -= planeSelectorBits; // Read color data... - uint32_t colorDataBits = remainingBits; + u32 colorDataBits = remainingBits; while (remainingBits > 0) { - uint32_t nb = std::min(remainingBits, 8); - uint32_t b = strm.ReadBits(nb); - colorEndpointStream.WriteBits(b, nb); + u32 nb = std::min(remainingBits, 8); + u32 b = strm.ReadBits(nb); + colorEndpos32Stream.WriteBits(b, nb); remainingBits -= 8; } @@ -1493,64 +1474,64 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, // Read the rest of the CEM if (baseMode) { - uint32_t extraCEM = strm.ReadBits(extraCEMbits); - uint32_t CEM = (extraCEM << 6) | baseCEM; + u32 extraCEM = strm.ReadBits(extraCEMbits); + u32 CEM = (extraCEM << 6) | baseCEM; CEM >>= 2; bool C[4] = {0}; - for (uint32_t i = 0; i < nPartitions; i++) { + for (u32 i = 0; i < nPartitions; i++) { C[i] = CEM & 1; CEM >>= 1; } - uint8_t M[4] = {0}; - for (uint32_t i = 0; i < nPartitions; i++) { + u8 M[4] = {0}; + for (u32 i = 0; i < nPartitions; i++) { M[i] = CEM & 3; CEM >>= 2; assert(M[i] <= 3); } - for (uint32_t i = 0; i < nPartitions; i++) { - colorEndpointMode[i] = baseMode; + for (u32 i = 0; i < nPartitions; i++) { + colorEndpos32Mode[i] = baseMode; if (!(C[i])) - colorEndpointMode[i] -= 1; - colorEndpointMode[i] <<= 2; - colorEndpointMode[i] |= M[i]; + colorEndpos32Mode[i] -= 1; + colorEndpos32Mode[i] <<= 2; + colorEndpos32Mode[i] |= M[i]; } } else if (nPartitions > 1) { - uint32_t CEM = baseCEM >> 2; - for (uint32_t i = 0; i < nPartitions; i++) { - colorEndpointMode[i] = CEM; + u32 CEM = baseCEM >> 2; + for (u32 i = 0; i < nPartitions; i++) { + colorEndpos32Mode[i] = CEM; } } // Make sure everything up till here is sane. - for (uint32_t i = 0; i < nPartitions; i++) { - assert(colorEndpointMode[i] < 16); + for (u32 i = 0; i < nPartitions; i++) { + assert(colorEndpos32Mode[i] < 16); } assert(strm.GetBitsRead() + weightParams.GetPackedBitSize() == 128); // Decode both color data and texel weight data - uint32_t colorValues[32]; // Four values, two endpoints, four maximum paritions - DecodeColorValues(colorValues, colorEndpointData, colorEndpointMode, nPartitions, + u32 colorValues[32]; // Four values, two endpos32s, four maximum paritions + DecodeColorValues(colorValues, colorEndpos32Data, colorEndpos32Mode, nPartitions, colorDataBits); - Pixel endpoints[4][2]; - const uint32_t* colorValuesPtr = colorValues; - for (uint32_t i = 0; i < nPartitions; i++) { - ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); + Pixel endpos32s[4][2]; + const u32* colorValuesPtr = colorValues; + for (u32 i = 0; i < nPartitions; i++) { + ComputeEndpos32s(endpos32s[i][0], endpos32s[i][1], colorValuesPtr, colorEndpos32Mode[i]); } // Read the texel weight data.. - uint8_t texelWeightData[16]; + u8 texelWeightData[16]; memcpy(texelWeightData, inBuf, sizeof(texelWeightData)); // Reverse everything - for (uint32_t i = 0; i < 8; i++) { + for (u32 i = 0; i < 8; i++) { // Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits #define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 - unsigned char a = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[i])); - unsigned char b = static_cast<unsigned char>(REVERSE_BYTE(texelWeightData[15 - i])); + u8 a = static_cast<u8>(REVERSE_BYTE(texelWeightData[i])); + u8 b = static_cast<u8>(REVERSE_BYTE(texelWeightData[15 - i])); #undef REVERSE_BYTE texelWeightData[i] = b; @@ -1558,50 +1539,51 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, } // Make sure that higher non-texel bits are set to zero - const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; + const u32 clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1; texelWeightData[clearByteStart - 1] = texelWeightData[clearByteStart - 1] & - static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); + static_cast<u8>((1 << (weightParams.GetPackedBitSize() % 8)) - 1); memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart); std::vector<IntegerEncodedValue> texelWeightValues; + texelWeightValues.reserve(64); + InputBitStream weightStream(texelWeightData); - IntegerEncodedValue::DecodeIntegerSequence(texelWeightValues, weightStream, - weightParams.m_MaxWeight, - weightParams.GetNumWeightValues()); + DecodeIntegerSequence(texelWeightValues, weightStream, weightParams.m_MaxWeight, + weightParams.GetNumWeightValues()); // Blocks can be at most 12x12, so we can have as many as 144 weights - uint32_t weights[2][144]; + u32 weights[2][144]; UnquantizeTexelWeights(weights, texelWeightValues, weightParams, blockWidth, blockHeight); - // Now that we have endpoints and weights, we can interpolate and generate + // Now that we have endpos32s and weights, we can s32erpolate and generate // the proper decoding... - for (uint32_t j = 0; j < blockHeight; j++) - for (uint32_t i = 0; i < blockWidth; i++) { - uint32_t partition = Select2DPartition(partitionIndex, i, j, nPartitions, - (blockHeight * blockWidth) < 32); + for (u32 j = 0; j < blockHeight; j++) + for (u32 i = 0; i < blockWidth; i++) { + u32 partition = Select2DPartition(partitionIndex, i, j, nPartitions, + (blockHeight * blockWidth) < 32); assert(partition < nPartitions); Pixel p; - for (uint32_t c = 0; c < 4; c++) { - uint32_t C0 = endpoints[partition][0].Component(c); + for (u32 c = 0; c < 4; c++) { + u32 C0 = endpos32s[partition][0].Component(c); C0 = Replicate(C0, 8, 16); - uint32_t C1 = endpoints[partition][1].Component(c); + u32 C1 = endpos32s[partition][1].Component(c); C1 = Replicate(C1, 8, 16); - uint32_t plane = 0; + u32 plane = 0; if (weightParams.m_bDualPlane && (((planeIdx + 1) & 3) == c)) { plane = 1; } - uint32_t weight = weights[plane][j * blockWidth + i]; - uint32_t C = (C0 * (64 - weight) + C1 * weight + 32) / 64; + u32 weight = weights[plane][j * blockWidth + i]; + u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; if (C == 65535) { p.Component(c) = 255; } else { double Cf = static_cast<double>(C); - p.Component(c) = static_cast<uint16_t>(255.0 * (Cf / 65536.0) + 0.5); + p.Component(c) = static_cast<u16>(255.0 * (Cf / 65536.0) + 0.5); } } @@ -1613,26 +1595,26 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth, namespace Tegra::Texture::ASTC { -std::vector<uint8_t> Decompress(const uint8_t* data, uint32_t width, uint32_t height, - uint32_t depth, uint32_t block_width, uint32_t block_height) { - uint32_t blockIdx = 0; +std::vector<u8> Decompress(const u8* data, u32 width, u32 height, u32 depth, u32 block_width, + u32 block_height) { + u32 blockIdx = 0; std::size_t depth_offset = 0; - std::vector<uint8_t> outData(height * width * depth * 4); - for (uint32_t k = 0; k < depth; k++) { - for (uint32_t j = 0; j < height; j += block_height) { - for (uint32_t i = 0; i < width; i += block_width) { + std::vector<u8> outData(height * width * depth * 4); + for (u32 k = 0; k < depth; k++) { + for (u32 j = 0; j < height; j += block_height) { + for (u32 i = 0; i < width; i += block_width) { - const uint8_t* blockPtr = data + blockIdx * 16; + const u8* blockPtr = data + blockIdx * 16; // Blocks can be at most 12x12 - uint32_t uncompData[144]; + u32 uncompData[144]; ASTCC::DecompressBlock(blockPtr, block_width, block_height, uncompData); - uint32_t decompWidth = std::min(block_width, width - i); - uint32_t decompHeight = std::min(block_height, height - j); + u32 decompWidth = std::min(block_width, width - i); + u32 decompHeight = std::min(block_height, height - j); - uint8_t* outRow = depth_offset + outData.data() + (j * width + i) * 4; - for (uint32_t jj = 0; jj < decompHeight; jj++) { + u8* outRow = depth_offset + outData.data() + (j * width + i) * 4; + for (u32 jj = 0; jj < decompHeight; jj++) { memcpy(outRow + jj * width * 4, uncompData + jj * block_width, decompWidth * 4); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index acfe57e77..16e67cc0a 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -698,6 +698,8 @@ void Config::ReadSystemValues() { Settings::values.custom_rtc = std::nullopt; } + Settings::values.sound_index = ReadSetting(QStringLiteral("sound_index"), 1).toInt(); + qt_config->endGroup(); } @@ -1125,6 +1127,8 @@ void Config::SaveSystemValues() { Settings::values.custom_rtc.value_or(std::chrono::seconds{}).count()), 0); + WriteSetting(QStringLiteral("sound_index"), Settings::values.sound_index, 1); + qt_config->endGroup(); } diff --git a/src/yuzu/configuration/configure_system.cpp b/src/yuzu/configuration/configure_system.cpp index e1b52f8d9..f9a5b4fbe 100644 --- a/src/yuzu/configuration/configure_system.cpp +++ b/src/yuzu/configuration/configure_system.cpp @@ -56,6 +56,7 @@ void ConfigureSystem::SetConfiguration() { enabled = !Core::System::GetInstance().IsPoweredOn(); ui->combo_language->setCurrentIndex(Settings::values.language_index); + ui->combo_sound->setCurrentIndex(Settings::values.sound_index); ui->rng_seed_checkbox->setChecked(Settings::values.rng_seed.has_value()); ui->rng_seed_edit->setEnabled(Settings::values.rng_seed.has_value()); @@ -81,6 +82,7 @@ void ConfigureSystem::ApplyConfiguration() { } Settings::values.language_index = ui->combo_language->currentIndex(); + Settings::values.sound_index = ui->combo_sound->currentIndex(); if (ui->rng_seed_checkbox->isChecked()) { Settings::values.rng_seed = ui->rng_seed_edit->text().toULongLong(nullptr, 16); |