summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2016-04-29 15:42:47 +0200
committerbunnei <bunneidev@gmail.com>2016-04-29 15:42:47 +0200
commit90243c56fb90d7d74cbef40da3eec97d967c10a2 (patch)
tree94d223001196ca9b774a8d018535ba2be8de1b01 /src/video_core
parentCommon: Remove section measurement from profiler (#1731) (diff)
parentMove and rename the MemoryAccesses class to MemoryAccessTracker. (diff)
downloadyuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar
yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.gz
yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.bz2
yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.lz
yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.xz
yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.tar.zst
yuzu-90243c56fb90d7d74cbef40da3eec97d967c10a2.zip
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp129
-rw-r--r--src/video_core/debug_utils/debug_utils.h30
-rw-r--r--src/video_core/shader/shader.h2
-rw-r--r--src/video_core/vertex_loader.cpp140
-rw-r--r--src/video_core/vertex_loader.h28
6 files changed, 210 insertions, 121 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 76cfd4f7d..de4082b1f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -16,6 +16,7 @@ set(SRCS
shader/shader_interpreter.cpp
swrasterizer.cpp
utils.cpp
+ vertex_loader.cpp
video_core.cpp
)
@@ -43,6 +44,7 @@ set(HEADERS
shader/shader_interpreter.h
swrasterizer.h
utils.h
+ vertex_loader.h
video_core.h
)
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 97ba8214e..58883e374 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -21,6 +21,7 @@
#include "video_core/video_core.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/shader/shader_interpreter.h"
+#include "video_core/vertex_loader.h"
namespace Pica {
@@ -188,54 +189,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
#if PICA_LOG_TEV
DebugUtils::DumpTevStageConfig(regs.GetTevStages());
#endif
-
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr);
- const auto& attribute_config = regs.vertex_attributes;
- const u32 base_address = attribute_config.GetPhysicalBaseAddress();
-
- // Information about internal vertex attributes
- u32 vertex_attribute_sources[16];
- boost::fill(vertex_attribute_sources, 0xdeadbeef);
- u32 vertex_attribute_strides[16] = {};
- Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
-
- u32 vertex_attribute_elements[16] = {};
- u32 vertex_attribute_element_size[16] = {};
-
- // Setup attribute data from loaders
- for (int loader = 0; loader < 12; ++loader) {
- const auto& loader_config = attribute_config.attribute_loaders[loader];
-
- u32 offset = 0;
-
- // TODO: What happens if a loader overwrites a previous one's data?
- for (unsigned component = 0; component < loader_config.component_count; ++component) {
- if (component >= 12) {
- LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
- continue;
- }
-
- u32 attribute_index = loader_config.GetComponent(component);
- if (attribute_index < 12) {
- int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
- offset = Common::AlignUp(offset, element_size);
- vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
- vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
- vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
- vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
- vertex_attribute_element_size[attribute_index] = element_size;
- offset += attribute_config.GetStride(attribute_index);
- } else if (attribute_index < 16) {
- // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
- offset = Common::AlignUp(offset, 4);
- offset += (attribute_index - 11) * 4;
- } else {
- UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
- }
- }
- }
+ // Processes information about internal vertex attributes to figure out how a vertex is loaded.
+ // Later, these can be compiled and cached.
+ VertexLoader loader;
+ const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
+ loader.Setup(regs);
// Load vertices
bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -259,32 +220,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
}
}
- class {
- /// Combine overlapping and close ranges
- void SimplifyRanges() {
- for (auto it = ranges.begin(); it != ranges.end(); ++it) {
- // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
- auto it2 = std::next(it);
- while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
- it->second = std::max(it->second, it2->first + it2->second - it->first);
- it2 = ranges.erase(it2);
- }
- }
- }
-
- public:
- /// Record a particular memory access in the list
- void AddAccess(u32 paddr, u32 size) {
- // Create new range or extend existing one
- ranges[paddr] = std::max(ranges[paddr], size);
-
- // Simplify ranges...
- SimplifyRanges();
- }
-
- /// Map of accessed ranges (mapping start address to range size)
- std::map<u32, u32> ranges;
- } memory_accesses;
+ DebugUtils::MemoryAccessTracker memory_accesses;
// Simple circular-replacement vertex cache
// The size has been tuned for optimal balance between hit-rate and the cost of lookup
@@ -328,60 +264,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
if (!vertex_cache_hit) {
// Initialize data for the current vertex
Shader::InputVertex input;
-
- for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
- if (vertex_attribute_elements[i] != 0) {
- // Default attribute values set if array elements have < 4 components. This
- // is *not* carried over from the default attribute settings even if they're
- // enabled for this attribute.
- static const float24 zero = float24::FromFloat32(0.0f);
- static const float24 one = float24::FromFloat32(1.0f);
- input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
-
- // Load per-vertex data from the loader arrays
- for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
- u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
- const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
-
- if (g_debug_context && Pica::g_debug_context->recorder) {
- memory_accesses.AddAccess(source_addr,
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
- : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
- }
-
- const float srcval =
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) :
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
- *reinterpret_cast<const float*>(srcdata);
-
- input.attr[i][comp] = float24::FromFloat32(srcval);
- LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
- comp, i, vertex, index,
- attribute_config.GetPhysicalBaseAddress(),
- vertex_attribute_sources[i] - base_address,
- vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
- input.attr[i][comp].ToFloat32());
- }
- } else if (attribute_config.IsDefaultAttribute(i)) {
- // Load the default attribute if we're configured to do so
- input.attr[i] = g_state.vs.default_attributes[i];
- LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
- i, vertex, index,
- input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
- input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
- } else {
- // TODO(yuriks): In this case, no data gets loaded and the vertex
- // remains with the last value it had. This isn't currently maintained
- // as global state, however, and so won't work in Citra yet.
- }
- }
+ loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
// Send to vertex shader
- output = Shader::Run(shader_unit, input, attribute_config.GetNumTotalAttributes());
+ output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes());
if (is_indexed) {
vertex_cache[vertex_cache_pos] = output;
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 56f9bd958..dd0828cee 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -216,6 +216,36 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
+/**
+ * Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
+ */
+class MemoryAccessTracker {
+ /// Combine overlapping and close ranges
+ void SimplifyRanges() {
+ for (auto it = ranges.begin(); it != ranges.end(); ++it) {
+ // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
+ auto it2 = std::next(it);
+ while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
+ it->second = std::max(it->second, it2->first + it2->second - it->first);
+ it2 = ranges.erase(it2);
+ }
+ }
+ }
+
+public:
+ /// Record a particular memory access in the list
+ void AddAccess(u32 paddr, u32 size) {
+ // Create new range or extend existing one
+ ranges[paddr] = std::max(ranges[paddr], size);
+
+ // Simplify ranges...
+ SimplifyRanges();
+ }
+
+ /// Map of accessed ranges (mapping start address to range size)
+ std::map<u32, u32> ranges;
+};
+
} // namespace
} // namespace
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 9c5bd97bd..9ce9344d2 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -25,7 +25,7 @@ namespace Pica {
namespace Shader {
struct InputVertex {
- Math::Vec4<float24> attr[16];
+ alignas(16) Math::Vec4<float24> attr[16];
};
struct OutputVertex {
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
new file mode 100644
index 000000000..8a3d91896
--- /dev/null
+++ b/src/video_core/vertex_loader.cpp
@@ -0,0 +1,140 @@
+#include <cmath>
+#include <string>
+
+#include "boost/range/algorithm/fill.hpp"
+
+#include "common/assert.h"
+#include "common/alignment.h"
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+
+#include "core/memory.h"
+
+#include "video_core/debug_utils/debug_utils.h"
+#include "video_core/pica.h"
+#include "video_core/pica_state.h"
+#include "video_core/pica_types.h"
+#include "video_core/vertex_loader.h"
+
+namespace Pica {
+
+void VertexLoader::Setup(const Pica::Regs& regs) {
+ const auto& attribute_config = regs.vertex_attributes;
+ num_total_attributes = attribute_config.GetNumTotalAttributes();
+
+ boost::fill(vertex_attribute_sources, 0xdeadbeef);
+
+ for (int i = 0; i < 16; i++) {
+ vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i);
+ }
+
+ // Setup attribute data from loaders
+ for (int loader = 0; loader < 12; ++loader) {
+ const auto& loader_config = attribute_config.attribute_loaders[loader];
+
+ u32 offset = 0;
+
+ // TODO: What happens if a loader overwrites a previous one's data?
+ for (unsigned component = 0; component < loader_config.component_count; ++component) {
+ if (component >= 12) {
+ LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component);
+ continue;
+ }
+
+ u32 attribute_index = loader_config.GetComponent(component);
+ if (attribute_index < 12) {
+ offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index));
+ vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset;
+ vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
+ vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
+ vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
+ offset += attribute_config.GetStride(attribute_index);
+ } else if (attribute_index < 16) {
+ // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
+ offset = Common::AlignUp(offset, 4);
+ offset += (attribute_index - 11) * 4;
+ } else {
+ UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
+ }
+ }
+ }
+}
+
+void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
+ for (int i = 0; i < num_total_attributes; ++i) {
+ if (vertex_attribute_elements[i] != 0) {
+ // Load per-vertex data from the loader arrays
+ u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex;
+
+ if (g_debug_context && Pica::g_debug_context->recorder) {
+ memory_accesses.AddAccess(source_addr, vertex_attribute_elements[i] * (
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
+ : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1));
+ }
+
+ switch (vertex_attribute_formats[i]) {
+ case Regs::VertexAttributeFormat::BYTE:
+ {
+ const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr));
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
+ }
+ break;
+ }
+ case Regs::VertexAttributeFormat::UBYTE:
+ {
+ const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr));
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
+ }
+ break;
+ }
+ case Regs::VertexAttributeFormat::SHORT:
+ {
+ const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr));
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
+ }
+ break;
+ }
+ case Regs::VertexAttributeFormat::FLOAT:
+ {
+ const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr));
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ input.attr[i][comp] = float24::FromFloat32(srcdata[comp]);
+ }
+ break;
+ }
+ }
+
+ // Default attribute values set if array elements have < 4 components. This
+ // is *not* carried over from the default attribute settings even if they're
+ // enabled for this attribute.
+ for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) {
+ input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f);
+ }
+
+ LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f",
+ vertex_attribute_elements[i], i, vertex, index,
+ base_address,
+ vertex_attribute_sources[i],
+ vertex_attribute_strides[i] * vertex,
+ input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
+ } else if (vertex_attribute_is_default[i]) {
+ // Load the default attribute if we're configured to do so
+ input.attr[i] = g_state.vs.default_attributes[i];
+ LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
+ i, vertex, index,
+ input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
+ input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
+ } else {
+ // TODO(yuriks): In this case, no data gets loaded and the vertex
+ // remains with the last value it had. This isn't currently maintained
+ // as global state, however, and so won't work in Citra yet.
+ }
+ }
+}
+
+} // namespace Pica \ No newline at end of file
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
new file mode 100644
index 000000000..ff42d1596
--- /dev/null
+++ b/src/video_core/vertex_loader.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#include <iterator>
+#include <algorithm>
+
+#include "video_core/pica.h"
+#include "video_core/shader/shader.h"
+#include "video_core/debug_utils/debug_utils.h"
+
+namespace Pica {
+
+class VertexLoader {
+public:
+ void Setup(const Pica::Regs& regs);
+ void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
+
+ int GetNumTotalAttributes() const { return num_total_attributes; }
+
+private:
+ u32 vertex_attribute_sources[16];
+ u32 vertex_attribute_strides[16] = {};
+ Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
+ u32 vertex_attribute_elements[16] = {};
+ bool vertex_attribute_is_default[16];
+ int num_total_attributes;
+};
+
+} // namespace Pica