summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp91
-rw-r--r--src/video_core/engines/maxwell_3d.h16
-rw-r--r--src/video_core/engines/shader_bytecode.h4
-rw-r--r--src/video_core/gpu.cpp18
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/memory_manager.cpp14
-rw-r--r--src/video_core/memory_manager.h7
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp4
-rw-r--r--src/video_core/shader/decode/bfi.cpp2
9 files changed, 92 insertions, 66 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7cea146f0..0b3e8749b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
#include "core/core_timing.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/textures/texture.h"
@@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() {
regs.reg_array[0xd00] = 1;
}
-void Maxwell3D::ProcessQueryGet() {
+void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
+ struct LongQueryResult {
+ u64_le value;
+ u64_le timestamp;
+ };
+ static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
const GPUVAddr sequence_address{regs.query.QueryAddress()};
- // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
- // VAddr before writing.
+ if (long_query) {
+ // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
+ // GPU, this command may actually take a while to complete in real hardware due to GPU
+ // wait queues.
+ LongQueryResult query_result{payload, system.GPU().GetTicks()};
+ memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ } else {
+ memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
+ }
+}
+void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
"Units other than CROP are unimplemented");
- u64 result = 0;
-
- // TODO(Subv): Support the other query variables
- switch (regs.query.query_get.select) {
- case Regs::QuerySelect::Zero:
- // This seems to actually write the query sequence to the query address.
- result = regs.query.query_sequence;
+ switch (regs.query.query_get.operation) {
+ case Regs::QueryOperation::Release: {
+ const u64 result = regs.query.query_sequence;
+ StampQueryResult(result, regs.query.query_get.short_query == 0);
break;
- default:
- result = 1;
- UNIMPLEMENTED_MSG("Unimplemented query select type {}",
- static_cast<u32>(regs.query.query_get.select.Value()));
}
-
- // TODO(Subv): Research and implement how query sync conditions work.
-
- struct LongQueryResult {
- u64_le value;
- u64_le timestamp;
- };
- static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
-
- switch (regs.query.query_get.mode) {
- case Regs::QueryMode::Write:
- case Regs::QueryMode::Write2: {
- u32 sequence = regs.query.query_sequence;
- if (regs.query.query_get.short_query) {
- // Write the current query sequence to the sequence address.
- // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
- // query.
- memory_manager.Write<u32>(sequence_address, sequence);
- } else {
- // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
- // GPU, this command may actually take a while to complete in real hardware due to GPU
- // wait queues.
- LongQueryResult query_result{};
- query_result.value = result;
- // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
- query_result.timestamp = system.CoreTiming().GetTicks();
- memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
+ case Regs::QueryOperation::Acquire: {
+ // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
+ // to write a value that matches the current payload.
+ UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
+ break;
+ }
+ case Regs::QueryOperation::Counter: {
+ u64 result{};
+ switch (regs.query.query_get.select) {
+ case Regs::QuerySelect::Zero:
+ result = 0;
+ break;
+ default:
+ result = 1;
+ UNIMPLEMENTED_MSG("Unimplemented query select type {}",
+ static_cast<u32>(regs.query.query_get.select.Value()));
}
+ StampQueryResult(result, regs.query.query_get.short_query == 0);
+ break;
+ }
+ case Regs::QueryOperation::Trap: {
+ UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
+ break;
+ }
+ default: {
+ UNIMPLEMENTED_MSG("Unknown query operation");
break;
}
- default:
- UNIMPLEMENTED_MSG("Query mode {} not implemented",
- static_cast<u32>(regs.query.query_get.mode.Value()));
}
}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7b1912a66..0a2af54e5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -71,12 +71,11 @@ public:
static constexpr std::size_t MaxConstBuffers = 18;
static constexpr std::size_t MaxConstBufferSize = 0x10000;
- enum class QueryMode : u32 {
- Write = 0,
- Sync = 1,
- // TODO(Subv): It is currently unknown what the difference between method 2 and method 0
- // is.
- Write2 = 2,
+ enum class QueryOperation : u32 {
+ Release = 0,
+ Acquire = 1,
+ Counter = 2,
+ Trap = 3,
};
enum class QueryUnit : u32 {
@@ -1081,7 +1080,7 @@ public:
u32 query_sequence;
union {
u32 raw;
- BitField<0, 2, QueryMode> mode;
+ BitField<0, 2, QueryOperation> operation;
BitField<4, 1, u32> fence;
BitField<12, 4, QueryUnit> unit;
BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1413,6 +1412,9 @@ private:
/// Handles a write to the QUERY_GET register.
void ProcessQueryGet();
+ // Writes the query result accordingly
+ void StampQueryResult(u64 payload, bool long_query);
+
// Handles Conditional Rendering
void ProcessQueryCondition();
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 402869fde..c9bc83cd7 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1677,11 +1677,11 @@ union Instruction {
} xmad;
union {
- BitField<20, 14, u64> offset;
+ BitField<20, 14, u64> shifted_offset;
BitField<34, 5, u64> index;
u64 GetOffset() const {
- return offset * 4;
+ return shifted_offset * 4;
}
} cbuf34;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 062ca83b8..7d7137109 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
+#include "core/core_timing_util.h"
#include "core/memory.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
@@ -23,7 +24,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()};
- memory_manager = std::make_unique<Tegra::MemoryManager>(system);
+ memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
@@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
return true;
}
+u64 GPU::GetTicks() const {
+ // This values were reversed engineered by fincs from NVN
+ // The gpu clock is reported in units of 385/625 nanoseconds
+ constexpr u64 gpu_ticks_num = 384;
+ constexpr u64 gpu_ticks_den = 625;
+
+ const u64 cpu_ticks = system.CoreTiming().GetTicks();
+ const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+ const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
+ const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
+ return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
+}
+
void GPU::FlushCommands() {
renderer.Rasterizer().FlushCommands();
}
@@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
- block.timestamp = system.CoreTiming().GetTicks();
+ block.timestamp = GetTicks();
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
sizeof(block));
} else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b648317bb..07727210c 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:
bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
+ u64 GetTicks() const;
+
std::unique_lock<std::mutex> LockSync() {
return std::unique_lock{sync_mutex};
}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index f1d50be3e..11848fbce 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -9,12 +9,13 @@
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/vm_manager.h"
#include "core/memory.h"
-#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
namespace Tegra {
-MemoryManager::MemoryManager(Core::System& system) : system{system} {
+MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
+ : rasterizer{rasterizer}, system{system} {
std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
std::fill(page_table.attributes.begin(), page_table.attributes.end(),
Common::PageType::Unmapped);
@@ -83,8 +84,7 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) {
const auto cpu_addr = GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
- system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size);
-
+ rasterizer.FlushAndInvalidateRegion(cache_addr, aligned_size);
UnmapRange(gpu_addr, aligned_size);
ASSERT(system.CurrentProcess()
->VMManager()
@@ -242,7 +242,7 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
- system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+ rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
std::memcpy(dest_buffer, src_ptr, copy_amount);
break;
}
@@ -292,7 +292,7 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
u8* dest_ptr{page_table.pointers[page_index] + page_offset};
- system.GPU().InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
+ rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount);
std::memcpy(dest_ptr, src_buffer, copy_amount);
break;
}
@@ -340,7 +340,7 @@ void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::
switch (page_table.attributes[page_index]) {
case Common::PageType::Memory: {
const u8* src_ptr{page_table.pointers[page_index] + page_offset};
- system.GPU().FlushRegion(ToCacheAddr(src_ptr), copy_amount);
+ rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount);
WriteBlock(dest_addr, src_ptr, copy_amount);
break;
}
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 393447eb4..aea010087 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -10,6 +10,10 @@
#include "common/common_types.h"
#include "common/page_table.h"
+namespace VideoCore {
+class RasterizerInterface;
+}
+
namespace Core {
class System;
}
@@ -47,7 +51,7 @@ struct VirtualMemoryArea {
class MemoryManager final {
public:
- explicit MemoryManager(Core::System& system);
+ explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
~MemoryManager();
GPUVAddr AllocateSpace(u64 size, u64 align);
@@ -172,6 +176,7 @@ private:
Common::PageTable page_table{page_bits};
VMAMap vma_map;
+ VideoCore::RasterizerInterface& rasterizer;
Core::System& system;
};
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index e60875cc4..21366869d 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -166,13 +166,13 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
switch (opcode->get().GetId()) {
case OpCode::Id::ICMP_CR:
- return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
+ return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
GetRegister(instr.gpr39)};
case OpCode::Id::ICMP_R:
return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
case OpCode::Id::ICMP_RC:
return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::ICMP_IMM:
return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
default:
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index f992bbe2a..70d1c055b 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -21,7 +21,7 @@ u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
switch (opcode->get().GetId()) {
case OpCode::Id::BFI_RC:
return {GetRegister(instr.gpr39),
- GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)};
+ GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
case OpCode::Id::BFI_IMM_R:
return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
default: