summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp440
-rw-r--r--src/video_core/geometry_pipeline.cpp2
-rw-r--r--src/video_core/pica_types.h18
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp4
-rw-r--r--src/video_core/swrasterizer/clipper.cpp2
-rw-r--r--src/video_core/utils.h14
6 files changed, 240 insertions, 240 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index fb65a3a0a..caf9f7a06 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -119,6 +119,224 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup,
}
}
+static void LoadDefaultVertexAttributes(u32 register_value) {
+ auto& regs = g_state.regs;
+
+ // TODO: Does actual hardware indeed keep an intermediate buffer or does
+ // it directly write the values?
+ default_attr_write_buffer[default_attr_counter++] = register_value;
+
+ // Default attributes are written in a packed format such that four float24 values are encoded
+ // in three 32-bit numbers.
+ // We write to internal memory once a full such vector is written.
+ if (default_attr_counter >= 3) {
+ default_attr_counter = 0;
+
+ auto& setup = regs.pipeline.vs_default_attributes_setup;
+
+ if (setup.index >= 16) {
+ LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
+ return;
+ }
+
+ Math::Vec4<float24> attribute;
+
+ // NOTE: The destination component order indeed is "backwards"
+ attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
+ attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
+ ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
+ attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
+ ((default_attr_write_buffer[2] >> 24) & 0xFF));
+ attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
+
+ LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
+ attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
+ attribute.w.ToFloat32());
+
+ // TODO: Verify that this actually modifies the register!
+ if (setup.index < 15) {
+ g_state.input_default_attributes.attr[setup.index] = attribute;
+ setup.index++;
+ } else {
+ // Put each attribute into an immediate input buffer. When all specified immediate
+ // attributes are present, the Vertex Shader is invoked and everything is sent to
+ // the primitive assembler.
+
+ auto& immediate_input = g_state.immediate.input_vertex;
+ auto& immediate_attribute_id = g_state.immediate.current_attribute;
+
+ immediate_input.attr[immediate_attribute_id] = attribute;
+
+ if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
+ immediate_attribute_id += 1;
+ } else {
+ MICROPROFILE_SCOPE(GPU_Drawing);
+ immediate_attribute_id = 0;
+
+ auto* shader_engine = Shader::GetEngine();
+ shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
+
+ // Send to vertex shader
+ if (g_debug_context)
+ g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
+ static_cast<void*>(&immediate_input));
+ Shader::UnitState shader_unit;
+ Shader::AttributeBuffer output{};
+
+ shader_unit.LoadInput(regs.vs, immediate_input);
+ shader_engine->Run(g_state.vs, shader_unit);
+ shader_unit.WriteOutput(regs.vs, output);
+
+ // Send to geometry pipeline
+ if (g_state.immediate.reset_geometry_pipeline) {
+ g_state.geometry_pipeline.Reconfigure();
+ g_state.immediate.reset_geometry_pipeline = false;
+ }
+ ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
+ g_state.geometry_pipeline.Setup(shader_engine);
+ g_state.geometry_pipeline.SubmitVertex(output);
+
+ // TODO: If drawing after every immediate mode triangle kills performance,
+ // change it to flush triangles whenever a drawing config register changes
+ // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
+ VideoCore::g_renderer->Rasterizer()->DrawTriangles();
+ if (g_debug_context) {
+ g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+ }
+ }
+ }
+ }
+}
+
+static void Draw(u32 command_id) {
+ MICROPROFILE_SCOPE(GPU_Drawing);
+ auto& regs = g_state.regs;
+
+#if PICA_LOG_TEV
+ DebugUtils::DumpTevStageConfig(regs.GetTevStages());
+#endif
+ if (g_debug_context)
+ g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
+
+ // Processes information about internal vertex attributes to figure out how a vertex is
+ // loaded.
+ // Later, these can be compiled and cached.
+ const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
+ VertexLoader loader(regs.pipeline);
+
+ // Load vertices
+ bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
+
+ const auto& index_info = regs.pipeline.index_array;
+ const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
+ const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
+ bool index_u16 = index_info.format != 0;
+
+ PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
+
+ if (g_debug_context && g_debug_context->recorder) {
+ for (int i = 0; i < 3; ++i) {
+ const auto texture = regs.texturing.GetTextures()[i];
+ if (!texture.enabled)
+ continue;
+
+ u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
+ g_debug_context->recorder->MemoryAccessed(
+ texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
+ texture.config.width / 2 * texture.config.height,
+ texture.config.GetPhysicalAddress());
+ }
+ }
+
+ DebugUtils::MemoryAccessTracker memory_accesses;
+
+ // Simple circular-replacement vertex cache
+ // The size has been tuned for optimal balance between hit-rate and the cost of lookup
+ const size_t VERTEX_CACHE_SIZE = 32;
+ std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
+ std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
+ Shader::AttributeBuffer vs_output;
+
+ unsigned int vertex_cache_pos = 0;
+ vertex_cache_ids.fill(-1);
+
+ auto* shader_engine = Shader::GetEngine();
+ Shader::UnitState shader_unit;
+
+ shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
+
+ g_state.geometry_pipeline.Reconfigure();
+ g_state.geometry_pipeline.Setup(shader_engine);
+ if (g_state.geometry_pipeline.NeedIndexInput())
+ ASSERT(is_indexed);
+
+ for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
+ // Indexed rendering doesn't use the start offset
+ unsigned int vertex = is_indexed
+ ? (index_u16 ? index_address_16[index] : index_address_8[index])
+ : (index + regs.pipeline.vertex_offset);
+
+ // -1 is a common special value used for primitive restart. Since it's unknown if
+ // the PICA supports it, and it would mess up the caching, guard against it here.
+ ASSERT(vertex != -1);
+
+ bool vertex_cache_hit = false;
+
+ if (is_indexed) {
+ if (g_state.geometry_pipeline.NeedIndexInput()) {
+ g_state.geometry_pipeline.SubmitIndex(vertex);
+ continue;
+ }
+
+ if (g_debug_context && Pica::g_debug_context->recorder) {
+ int size = index_u16 ? 2 : 1;
+ memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
+ }
+
+ for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
+ if (vertex == vertex_cache_ids[i]) {
+ vs_output = vertex_cache[i];
+ vertex_cache_hit = true;
+ break;
+ }
+ }
+ }
+
+ if (!vertex_cache_hit) {
+ // Initialize data for the current vertex
+ Shader::AttributeBuffer input;
+ loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
+
+ // Send to vertex shader
+ if (g_debug_context)
+ g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
+ (void*)&input);
+ shader_unit.LoadInput(regs.vs, input);
+ shader_engine->Run(g_state.vs, shader_unit);
+ shader_unit.WriteOutput(regs.vs, vs_output);
+
+ if (is_indexed) {
+ vertex_cache[vertex_cache_pos] = vs_output;
+ vertex_cache_ids[vertex_cache_pos] = vertex;
+ vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
+ }
+ }
+
+ // Send to geometry pipeline
+ g_state.geometry_pipeline.SubmitVertex(vs_output);
+ }
+
+ for (auto& range : memory_accesses.ranges) {
+ g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
+ range.second, range.first);
+ }
+
+ VideoCore::g_renderer->Rasterizer()->DrawTriangles();
+ if (g_debug_context) {
+ g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+ }
+}
+
static void WritePicaReg(u32 id, u32 value, u32 mask) {
auto& regs = g_state.regs;
@@ -168,98 +386,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// Load default vertex input attributes
case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233):
case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234):
- case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): {
- // TODO: Does actual hardware indeed keep an intermediate buffer or does
- // it directly write the values?
- default_attr_write_buffer[default_attr_counter++] = value;
-
- // Default attributes are written in a packed format such that four float24 values are
- // encoded in
- // three 32-bit numbers. We write to internal memory once a full such vector is
- // written.
- if (default_attr_counter >= 3) {
- default_attr_counter = 0;
-
- auto& setup = regs.pipeline.vs_default_attributes_setup;
-
- if (setup.index >= 16) {
- LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
- break;
- }
-
- Math::Vec4<float24> attribute;
-
- // NOTE: The destination component order indeed is "backwards"
- attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
- attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) |
- ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
- attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) |
- ((default_attr_write_buffer[2] >> 24) & 0xFF));
- attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
-
- LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
- attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
- attribute.w.ToFloat32());
-
- // TODO: Verify that this actually modifies the register!
- if (setup.index < 15) {
- g_state.input_default_attributes.attr[setup.index] = attribute;
- setup.index++;
- } else {
- // Put each attribute into an immediate input buffer. When all specified immediate
- // attributes are present, the Vertex Shader is invoked and everything is sent to
- // the primitive assembler.
-
- auto& immediate_input = g_state.immediate.input_vertex;
- auto& immediate_attribute_id = g_state.immediate.current_attribute;
-
- immediate_input.attr[immediate_attribute_id] = attribute;
-
- if (immediate_attribute_id < regs.pipeline.max_input_attrib_index) {
- immediate_attribute_id += 1;
- } else {
- MICROPROFILE_SCOPE(GPU_Drawing);
- immediate_attribute_id = 0;
-
- auto* shader_engine = Shader::GetEngine();
- shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
-
- // Send to vertex shader
- if (g_debug_context)
- g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
- static_cast<void*>(&immediate_input));
- Shader::UnitState shader_unit;
- Shader::AttributeBuffer output{};
-
- shader_unit.LoadInput(regs.vs, immediate_input);
- shader_engine->Run(g_state.vs, shader_unit);
- shader_unit.WriteOutput(regs.vs, output);
-
- // Send to geometry pipeline
- if (g_state.immediate.reset_geometry_pipeline) {
- g_state.geometry_pipeline.Reconfigure();
- g_state.immediate.reset_geometry_pipeline = false;
- }
- ASSERT(!g_state.geometry_pipeline.NeedIndexInput());
- g_state.geometry_pipeline.Setup(shader_engine);
- g_state.geometry_pipeline.SubmitVertex(output);
- }
- }
- }
+ case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235):
+ LoadDefaultVertexAttributes(value);
break;
- }
case PICA_REG_INDEX(pipeline.gpu_mode):
- if (regs.pipeline.gpu_mode == PipelineRegs::GPUMode::Configuring) {
- MICROPROFILE_SCOPE(GPU_Drawing);
-
- // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring
- VideoCore::g_renderer->Rasterizer()->DrawTriangles();
-
- if (g_debug_context) {
- g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
- }
- }
+ // This register likely just enables vertex processing and doesn't need any special handling
break;
case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[0], 0x23c):
@@ -275,131 +407,9 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// It seems like these trigger vertex rendering
case PICA_REG_INDEX(pipeline.trigger_draw):
- case PICA_REG_INDEX(pipeline.trigger_draw_indexed): {
- MICROPROFILE_SCOPE(GPU_Drawing);
-
-#if PICA_LOG_TEV
- DebugUtils::DumpTevStageConfig(regs.GetTevStages());
-#endif
- if (g_debug_context)
- g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
-
- // Processes information about internal vertex attributes to figure out how a vertex is
- // loaded.
- // Later, these can be compiled and cached.
- const u32 base_address = regs.pipeline.vertex_attributes.GetPhysicalBaseAddress();
- VertexLoader loader(regs.pipeline);
-
- // Load vertices
- bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
-
- const auto& index_info = regs.pipeline.index_array;
- const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
- const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
- bool index_u16 = index_info.format != 0;
-
- PrimitiveAssembler<Shader::OutputVertex>& primitive_assembler = g_state.primitive_assembler;
-
- if (g_debug_context && g_debug_context->recorder) {
- for (int i = 0; i < 3; ++i) {
- const auto texture = regs.texturing.GetTextures()[i];
- if (!texture.enabled)
- continue;
-
- u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
- g_debug_context->recorder->MemoryAccessed(
- texture_data, Pica::TexturingRegs::NibblesPerPixel(texture.format) *
- texture.config.width / 2 * texture.config.height,
- texture.config.GetPhysicalAddress());
- }
- }
-
- DebugUtils::MemoryAccessTracker memory_accesses;
-
- // Simple circular-replacement vertex cache
- // The size has been tuned for optimal balance between hit-rate and the cost of lookup
- const size_t VERTEX_CACHE_SIZE = 32;
- std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
- std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache;
- Shader::AttributeBuffer vs_output;
-
- unsigned int vertex_cache_pos = 0;
- vertex_cache_ids.fill(-1);
-
- auto* shader_engine = Shader::GetEngine();
- Shader::UnitState shader_unit;
-
- shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
-
- g_state.geometry_pipeline.Reconfigure();
- g_state.geometry_pipeline.Setup(shader_engine);
- if (g_state.geometry_pipeline.NeedIndexInput())
- ASSERT(is_indexed);
-
- for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
- // Indexed rendering doesn't use the start offset
- unsigned int vertex =
- is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
- : (index + regs.pipeline.vertex_offset);
-
- // -1 is a common special value used for primitive restart. Since it's unknown if
- // the PICA supports it, and it would mess up the caching, guard against it here.
- ASSERT(vertex != -1);
-
- bool vertex_cache_hit = false;
-
- if (is_indexed) {
- if (g_state.geometry_pipeline.NeedIndexInput()) {
- g_state.geometry_pipeline.SubmitIndex(vertex);
- continue;
- }
-
- if (g_debug_context && Pica::g_debug_context->recorder) {
- int size = index_u16 ? 2 : 1;
- memory_accesses.AddAccess(base_address + index_info.offset + size * index,
- size);
- }
-
- for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
- if (vertex == vertex_cache_ids[i]) {
- vs_output = vertex_cache[i];
- vertex_cache_hit = true;
- break;
- }
- }
- }
-
- if (!vertex_cache_hit) {
- // Initialize data for the current vertex
- Shader::AttributeBuffer input;
- loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
-
- // Send to vertex shader
- if (g_debug_context)
- g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation,
- (void*)&input);
- shader_unit.LoadInput(regs.vs, input);
- shader_engine->Run(g_state.vs, shader_unit);
- shader_unit.WriteOutput(regs.vs, vs_output);
-
- if (is_indexed) {
- vertex_cache[vertex_cache_pos] = vs_output;
- vertex_cache_ids[vertex_cache_pos] = vertex;
- vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
- }
- }
-
- // Send to geometry pipeline
- g_state.geometry_pipeline.SubmitVertex(vs_output);
- }
-
- for (auto& range : memory_accesses.ranges) {
- g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
- range.second, range.first);
- }
-
+ case PICA_REG_INDEX(pipeline.trigger_draw_indexed):
+ Draw(id);
break;
- }
case PICA_REG_INDEX(gs.bool_uniforms):
WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value());
@@ -632,6 +642,6 @@ void ProcessCommandList(const u32* list, u32 size) {
}
}
-} // namespace
+} // namespace CommandProcessor
-} // namespace
+} // namespace Pica
diff --git a/src/video_core/geometry_pipeline.cpp b/src/video_core/geometry_pipeline.cpp
index b146e2ecb..98ff2ccd3 100644
--- a/src/video_core/geometry_pipeline.cpp
+++ b/src/video_core/geometry_pipeline.cpp
@@ -105,7 +105,7 @@ public:
DEBUG_ASSERT(need_index);
// The number of vertex input is put to the uniform register
- float24 vertex_num = float24::FromFloat32(val);
+ float24 vertex_num = float24::FromFloat32(static_cast<float>(val));
setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num);
// The second uniform register and so on are used for receiving input vertices
diff --git a/src/video_core/pica_types.h b/src/video_core/pica_types.h
index 5d7e10066..2eafa7e9e 100644
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@@ -58,11 +58,12 @@ public:
}
Float<M, E> operator*(const Float<M, E>& flt) const {
- if ((this->value == 0.f && !std::isnan(flt.value)) ||
- (flt.value == 0.f && !std::isnan(this->value)))
- // PICA gives 0 instead of NaN when multiplying by inf
- return Zero();
- return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32());
+ float result = value * flt.ToFloat32();
+ // PICA gives 0 instead of NaN when multiplying by inf
+ if (!std::isnan(value) && !std::isnan(flt.ToFloat32()))
+ if (std::isnan(result))
+ result = 0.f;
+ return Float<M, E>::FromFloat32(result);
}
Float<M, E> operator/(const Float<M, E>& flt) const {
@@ -78,12 +79,7 @@ public:
}
Float<M, E>& operator*=(const Float<M, E>& flt) {
- if ((this->value == 0.f && !std::isnan(flt.value)) ||
- (flt.value == 0.f && !std::isnan(this->value)))
- // PICA gives 0 instead of NaN when multiplying by inf
- *this = Zero();
- else
- value *= flt.ToFloat32();
+ value = operator*(flt).value;
return *this;
}
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 06a905766..5770ae08f 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -267,9 +267,9 @@ void OpenGLState::Apply() const {
for (size_t i = 0; i < clip_distance.size(); ++i) {
if (clip_distance[i] != cur_state.clip_distance[i]) {
if (clip_distance[i]) {
- glEnable(GL_CLIP_DISTANCE0 + i);
+ glEnable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
} else {
- glDisable(GL_CLIP_DISTANCE0 + i);
+ glDisable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i));
}
}
}
diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp
index a52129eb7..c1ed48398 100644
--- a/src/video_core/swrasterizer/clipper.cpp
+++ b/src/video_core/swrasterizer/clipper.cpp
@@ -98,7 +98,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
auto FlipQuaternionIfOpposite = [](auto& a, const auto& b) {
if (Math::Dot(a, b) < float24::Zero())
- a = -a;
+ a = a * float24::FromFloat32(-1.0f);
};
// Flip the quaternions if they are opposite to prevent interpolating them over the wrong
diff --git a/src/video_core/utils.h b/src/video_core/utils.h
index 7ce83a055..d8567f314 100644
--- a/src/video_core/utils.h
+++ b/src/video_core/utils.h
@@ -8,17 +8,11 @@
namespace VideoCore {
-/**
- * Interleave the lower 3 bits of each coordinate to get the intra-block offsets, which are
- * arranged in a Z-order curve. More details on the bit manipulation at:
- * https://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
- */
+// 8x8 Z-Order coordinate from 2D coordinates
static inline u32 MortonInterleave(u32 x, u32 y) {
- u32 i = (x & 7) | ((y & 7) << 8); // ---- -210
- i = (i ^ (i << 2)) & 0x1313; // ---2 --10
- i = (i ^ (i << 1)) & 0x1515; // ---2 -1-0
- i = (i | (i >> 7)) & 0x3F;
- return i;
+ static const u32 xlut[] = {0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15};
+ static const u32 ylut[] = {0x00, 0x02, 0x08, 0x0a, 0x20, 0x22, 0x28, 0x2a};
+ return xlut[x % 8] + ylut[y % 8];
}
/**