summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/core.cpp25
-rw-r--r--src/core/file_sys/submission_package.cpp26
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp8
-rw-r--r--src/core/loader/nsp.cpp27
-rw-r--r--src/core/memory.cpp9
-rw-r--r--src/core/perf_stats.cpp47
-rw-r--r--src/core/perf_stats.h21
-rw-r--r--src/core/settings.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp2
-rw-r--r--src/video_core/engines/shader_bytecode.h18
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp87
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp50
-rw-r--r--src/video_core/shader/decode/warp.cpp47
-rw-r--r--src/video_core/shader/node.h10
-rw-r--r--src/yuzu/configuration/config.cpp5
-rw-r--r--src/yuzu/configuration/configure_input.cpp2
-rw-r--r--src/yuzu/main.cpp8
-rw-r--r--src/yuzu_cmd/config.cpp2
-rw-r--r--src/yuzu_cmd/default_ini.h2
21 files changed, 352 insertions, 52 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 3d0978cbf..9ab174de2 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -160,10 +160,6 @@ struct System::Impl {
LOG_DEBUG(Core, "Initialized OK");
- // Reset counters and set time origin to current frame
- GetAndResetPerfStats();
- perf_stats.BeginSystemFrame();
-
return ResultStatus::Success;
}
@@ -206,6 +202,16 @@ struct System::Impl {
main_process->Run(load_parameters->main_thread_priority,
load_parameters->main_thread_stack_size);
+ u64 title_id{0};
+ if (app_loader->ReadProgramId(title_id) != Loader::ResultStatus::Success) {
+ LOG_ERROR(Core, "Failed to find title id for ROM (Error {})",
+ static_cast<u32>(load_result));
+ }
+ perf_stats = std::make_unique<PerfStats>(title_id);
+ // Reset counters and set time origin to current frame
+ GetAndResetPerfStats();
+ perf_stats->BeginSystemFrame();
+
status = ResultStatus::Success;
return status;
}
@@ -219,6 +225,8 @@ struct System::Impl {
perf_results.game_fps);
telemetry_session->AddField(Telemetry::FieldType::Performance, "Shutdown_Frametime",
perf_results.frametime * 1000.0);
+ telemetry_session->AddField(Telemetry::FieldType::Performance, "Mean_Frametime_MS",
+ perf_stats->GetMeanFrametime());
is_powered_on = false;
@@ -229,6 +237,7 @@ struct System::Impl {
service_manager.reset();
cheat_engine.reset();
telemetry_session.reset();
+ perf_stats.reset();
gpu_core.reset();
// Close all CPU/threading state
@@ -286,7 +295,7 @@ struct System::Impl {
}
PerfStatsResults GetAndResetPerfStats() {
- return perf_stats.GetAndResetStats(core_timing.GetGlobalTimeUs());
+ return perf_stats->GetAndResetStats(core_timing.GetGlobalTimeUs());
}
Timing::CoreTiming core_timing;
@@ -327,7 +336,7 @@ struct System::Impl {
ResultStatus status = ResultStatus::Success;
std::string status_details = "";
- Core::PerfStats perf_stats;
+ std::unique_ptr<Core::PerfStats> perf_stats;
Core::FrameLimiter frame_limiter;
};
@@ -480,11 +489,11 @@ const Timing::CoreTiming& System::CoreTiming() const {
}
Core::PerfStats& System::GetPerfStats() {
- return impl->perf_stats;
+ return *impl->perf_stats;
}
const Core::PerfStats& System::GetPerfStats() const {
- return impl->perf_stats;
+ return *impl->perf_stats;
}
Core::FrameLimiter& System::FrameLimiter() {
diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp
index 8b3b14e25..730221fd6 100644
--- a/src/core/file_sys/submission_package.cpp
+++ b/src/core/file_sys/submission_package.cpp
@@ -14,6 +14,7 @@
#include "core/file_sys/content_archive.h"
#include "core/file_sys/nca_metadata.h"
#include "core/file_sys/partition_filesystem.h"
+#include "core/file_sys/program_metadata.h"
#include "core/file_sys/submission_package.h"
#include "core/loader/loader.h"
@@ -78,6 +79,10 @@ Loader::ResultStatus NSP::GetStatus() const {
}
Loader::ResultStatus NSP::GetProgramStatus(u64 title_id) const {
+ if (IsExtractedType() && GetExeFS() != nullptr && FileSys::IsDirectoryExeFS(GetExeFS())) {
+ return Loader::ResultStatus::Success;
+ }
+
const auto iter = program_status.find(title_id);
if (iter == program_status.end())
return Loader::ResultStatus::ErrorNSPMissingProgramNCA;
@@ -85,12 +90,29 @@ Loader::ResultStatus NSP::GetProgramStatus(u64 title_id) const {
}
u64 NSP::GetFirstTitleID() const {
+ if (IsExtractedType()) {
+ return GetProgramTitleID();
+ }
+
if (program_status.empty())
return 0;
return program_status.begin()->first;
}
u64 NSP::GetProgramTitleID() const {
+ if (IsExtractedType()) {
+ if (GetExeFS() == nullptr || !IsDirectoryExeFS(GetExeFS())) {
+ return 0;
+ }
+
+ ProgramMetadata meta;
+ if (meta.Load(GetExeFS()->GetFile("main.npdm")) == Loader::ResultStatus::Success) {
+ return meta.GetTitleID();
+ } else {
+ return 0;
+ }
+ }
+
const auto out = GetFirstTitleID();
if ((out & 0x800) == 0)
return out;
@@ -102,6 +124,10 @@ u64 NSP::GetProgramTitleID() const {
}
std::vector<u64> NSP::GetTitleIDs() const {
+ if (IsExtractedType()) {
+ return {GetProgramTitleID()};
+ }
+
std::vector<u64> out;
out.reserve(ncas.size());
for (const auto& kv : ncas)
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 241dac881..b4ee2a255 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -146,8 +146,8 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
}
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
- LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
- params.address, params.num_entries, params.flags.raw);
+ LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
+ params.num_entries, params.flags.raw);
ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
params.num_entries * sizeof(Tegra::CommandListHeader),
@@ -179,8 +179,8 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
}
IoctlSubmitGpfifo params{};
std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
- LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
- params.address, params.num_entries, params.flags.raw);
+ LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
+ params.num_entries, params.flags.raw);
Tegra::CommandList entries(params.num_entries);
Memory::ReadBlock(params.address, entries.data(),
diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp
index b1171ce65..35c82c99d 100644
--- a/src/core/loader/nsp.cpp
+++ b/src/core/loader/nsp.cpp
@@ -26,20 +26,18 @@ AppLoader_NSP::AppLoader_NSP(FileSys::VirtualFile file)
if (nsp->GetStatus() != ResultStatus::Success)
return;
- if (nsp->IsExtractedType())
- return;
-
- const auto control_nca =
- nsp->GetNCA(nsp->GetProgramTitleID(), FileSys::ContentRecordType::Control);
- if (control_nca == nullptr || control_nca->GetStatus() != ResultStatus::Success)
- return;
-
- std::tie(nacp_file, icon_file) =
- FileSys::PatchManager(nsp->GetProgramTitleID()).ParseControlNCA(*control_nca);
if (nsp->IsExtractedType()) {
secondary_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(nsp->GetExeFS());
} else {
+ const auto control_nca =
+ nsp->GetNCA(nsp->GetProgramTitleID(), FileSys::ContentRecordType::Control);
+ if (control_nca == nullptr || control_nca->GetStatus() != ResultStatus::Success)
+ return;
+
+ std::tie(nacp_file, icon_file) =
+ FileSys::PatchManager(nsp->GetProgramTitleID()).ParseControlNCA(*control_nca);
+
if (title_id == 0)
return;
@@ -56,11 +54,11 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& file) {
if (nsp.GetStatus() == ResultStatus::Success) {
// Extracted Type case
if (nsp.IsExtractedType() && nsp.GetExeFS() != nullptr &&
- FileSys::IsDirectoryExeFS(nsp.GetExeFS()) && nsp.GetRomFS() != nullptr) {
+ FileSys::IsDirectoryExeFS(nsp.GetExeFS())) {
return FileType::NSP;
}
- // Non-Ectracted Type case
+ // Non-Extracted Type case
if (!nsp.IsExtractedType() &&
nsp.GetNCA(nsp.GetFirstTitleID(), FileSys::ContentRecordType::Program) != nullptr &&
AppLoader_NCA::IdentifyType(nsp.GetNCAFile(
@@ -77,7 +75,7 @@ AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) {
return {ResultStatus::ErrorAlreadyLoaded, {}};
}
- if (title_id == 0) {
+ if (!nsp->IsExtractedType() && title_id == 0) {
return {ResultStatus::ErrorNSPMissingProgramNCA, {}};
}
@@ -91,7 +89,8 @@ AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) {
return {nsp_program_status, {}};
}
- if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) {
+ if (!nsp->IsExtractedType() &&
+ nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) {
if (!Core::Crypto::KeyManager::KeyFileExists(false)) {
return {ResultStatus::ErrorMissingProductionKeyFile, {}};
}
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 8555691c0..9e030789d 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -43,8 +43,13 @@ static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* me
// During boot, current_page_table might not be set yet, in which case we need not flush
if (Core::System::GetInstance().IsPoweredOn()) {
- Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
- size * PAGE_SIZE);
+ auto& gpu = Core::System::GetInstance().GPU();
+ for (u64 i = 0; i < size; i++) {
+ const auto page = base + i;
+ if (page_table.attributes[page] == Common::PageType::RasterizerCachedMemory) {
+ gpu.FlushAndInvalidateRegion(page << PAGE_BITS, PAGE_SIZE);
+ }
+ }
}
VAddr end = base + size;
diff --git a/src/core/perf_stats.cpp b/src/core/perf_stats.cpp
index 4afd6c8a3..d2c69d1a0 100644
--- a/src/core/perf_stats.cpp
+++ b/src/core/perf_stats.cpp
@@ -4,8 +4,14 @@
#include <algorithm>
#include <chrono>
+#include <iterator>
#include <mutex>
+#include <numeric>
+#include <sstream>
#include <thread>
+#include <fmt/chrono.h>
+#include <fmt/format.h>
+#include "common/file_util.h"
#include "common/math_util.h"
#include "core/perf_stats.h"
#include "core/settings.h"
@@ -15,8 +21,31 @@ using DoubleSecs = std::chrono::duration<double, std::chrono::seconds::period>;
using std::chrono::duration_cast;
using std::chrono::microseconds;
+// Purposefully ignore the first five frames, as there's a significant amount of overhead in
+// booting that we shouldn't account for
+constexpr std::size_t IgnoreFrames = 5;
+
namespace Core {
+PerfStats::PerfStats(u64 title_id) : title_id(title_id) {}
+
+PerfStats::~PerfStats() {
+ if (!Settings::values.record_frame_times || title_id == 0) {
+ return;
+ }
+
+ const std::time_t t = std::time(nullptr);
+ std::ostringstream stream;
+ std::copy(perf_history.begin() + IgnoreFrames, perf_history.begin() + current_index,
+ std::ostream_iterator<double>(stream, "\n"));
+ const std::string& path = FileUtil::GetUserPath(FileUtil::UserPath::LogDir);
+ // %F Date format expanded is "%Y-%m-%d"
+ const std::string filename =
+ fmt::format("{}/{:%F-%H-%M}_{:016X}.csv", path, *std::localtime(&t), title_id);
+ FileUtil::IOFile file(filename, "w");
+ file.WriteString(stream.str());
+}
+
void PerfStats::BeginSystemFrame() {
std::lock_guard lock{object_mutex};
@@ -27,7 +56,12 @@ void PerfStats::EndSystemFrame() {
std::lock_guard lock{object_mutex};
auto frame_end = Clock::now();
- accumulated_frametime += frame_end - frame_begin;
+ const auto frame_time = frame_end - frame_begin;
+ if (current_index < perf_history.size()) {
+ perf_history[current_index++] =
+ std::chrono::duration<double, std::milli>(frame_time).count();
+ }
+ accumulated_frametime += frame_time;
system_frames += 1;
previous_frame_length = frame_end - previous_frame_end;
@@ -40,6 +74,17 @@ void PerfStats::EndGameFrame() {
game_frames += 1;
}
+double PerfStats::GetMeanFrametime() {
+ std::lock_guard lock{object_mutex};
+
+ if (current_index <= IgnoreFrames) {
+ return 0;
+ }
+ const double sum = std::accumulate(perf_history.begin() + IgnoreFrames,
+ perf_history.begin() + current_index, 0);
+ return sum / (current_index - IgnoreFrames);
+}
+
PerfStatsResults PerfStats::GetAndResetStats(microseconds current_system_time_us) {
std::lock_guard lock{object_mutex};
diff --git a/src/core/perf_stats.h b/src/core/perf_stats.h
index 222ac1a63..d9a64f072 100644
--- a/src/core/perf_stats.h
+++ b/src/core/perf_stats.h
@@ -4,7 +4,9 @@
#pragma once
+#include <array>
#include <chrono>
+#include <cstddef>
#include <mutex>
#include "common/common_types.h"
@@ -27,6 +29,10 @@ struct PerfStatsResults {
*/
class PerfStats {
public:
+ explicit PerfStats(u64 title_id);
+
+ ~PerfStats();
+
using Clock = std::chrono::high_resolution_clock;
void BeginSystemFrame();
@@ -36,13 +42,26 @@ public:
PerfStatsResults GetAndResetStats(std::chrono::microseconds current_system_time_us);
/**
+ * Returns the Arthimetic Mean of all frametime values stored in the performance history.
+ */
+ double GetMeanFrametime();
+
+ /**
* Gets the ratio between walltime and the emulated time of the previous system frame. This is
* useful for scaling inputs or outputs moving between the two time domains.
*/
double GetLastFrameTimeScale();
private:
- std::mutex object_mutex;
+ std::mutex object_mutex{};
+
+ /// Title ID for the game that is running. 0 if there is no game running yet
+ u64 title_id{0};
+ /// Current index for writing to the perf_history array
+ std::size_t current_index{0};
+ /// Stores an hour of historical frametime data useful for processing and tracking performance
+ /// regressions with code changes.
+ std::array<double, 216000> perf_history = {};
/// Point when the cumulative counters were reset
Clock::time_point reset_point = Clock::now();
diff --git a/src/core/settings.h b/src/core/settings.h
index 6638ce8f9..d4b70ec4c 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -409,6 +409,7 @@ struct Values {
float volume;
// Debugging
+ bool record_frame_times;
bool use_gdbstub;
u16 gdbstub_port;
std::string program_args;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c7a3c85a0..fb3d1112c 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -541,7 +541,7 @@ void Maxwell3D::ProcessSyncPoint() {
}
void Maxwell3D::DrawArrays() {
- LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+ LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
regs.vertex_buffer.count);
ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 203e7758c..28272ef6f 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -566,6 +566,13 @@ enum class ImageAtomicOperation : u64 {
Exch = 8,
};
+enum class ShuffleOperation : u64 {
+ Idx = 0, // shuffleNV
+ Up = 1, // shuffleUpNV
+ Down = 2, // shuffleDownNV
+ Bfly = 3, // shuffleXorNV
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -600,6 +607,15 @@ union Instruction {
} vote;
union {
+ BitField<30, 2, ShuffleOperation> operation;
+ BitField<48, 3, u64> pred48;
+ BitField<28, 1, u64> is_index_imm;
+ BitField<29, 1, u64> is_mask_imm;
+ BitField<20, 5, u64> index_imm;
+ BitField<34, 13, u64> mask_imm;
+ } shfl;
+
+ union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
@@ -1547,6 +1563,7 @@ public:
BRK,
DEPBAR,
VOTE,
+ SHFL,
BFE_C,
BFE_R,
BFE_IMM,
@@ -1842,6 +1859,7 @@ private:
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
+ INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 909ccb82c..0dbc4c02f 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,7 +214,8 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
std::string source = "#version 430 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n"
"#extension GL_NV_gpu_shader5 : enable\n"
- "#extension GL_NV_shader_thread_group : enable\n";
+ "#extension GL_NV_shader_thread_group : enable\n"
+ "#extension GL_NV_shader_thread_shuffle : enable\n";
if (entries.shader_viewport_layer_array) {
source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 14834d86a..76439e7ab 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1021,10 +1021,10 @@ private:
return {std::move(temporary), value.GetType()};
}
- Expression GetOutputAttribute(const AbufNode* abuf) {
+ std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
switch (const auto attribute = abuf->GetIndex()) {
case Attribute::Index::Position:
- return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float};
+ return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}};
case Attribute::Index::LayerViewportPointSize:
switch (abuf->GetElement()) {
case 0:
@@ -1034,25 +1034,25 @@ private:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return {"gl_Layer", Type::Int};
+ return {{"gl_Layer", Type::Int}};
case 2:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return {"gl_ViewportIndex", Type::Int};
+ return {{"gl_ViewportIndex", Type::Int}};
case 3:
UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
- return {"gl_PointSize", Type::Float};
+ return {{"gl_PointSize", Type::Float}};
}
return {};
case Attribute::Index::ClipDistances0123:
- return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float};
+ return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}};
case Attribute::Index::ClipDistances4567:
- return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float};
+ return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
default:
if (IsGenericAttribute(attribute)) {
- return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()),
- Type::Float};
+ return {
+ {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
@@ -1292,7 +1292,11 @@ private:
target = {GetRegister(gpr->GetIndex()), Type::Float};
} else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- target = GetOutputAttribute(abuf);
+ auto output = GetOutputAttribute(abuf);
+ if (!output) {
+ return {};
+ }
+ target = std::move(*output);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
@@ -1953,8 +1957,7 @@ private:
Expression BallotThread(Operation operation) {
const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia warp intrinsics are not available and its required by a shader");
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
// Stub on non-Nvidia devices by simulating all threads voting the same as the active
// one.
return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
@@ -1965,8 +1968,7 @@ private:
Expression Vote(Operation operation, const char* func) {
const std::string value = VisitOperand(operation, 0).AsBool();
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia vote intrinsics are not available and its required by a shader");
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
// Stub with a warp size of one.
return {value, Type::Bool};
}
@@ -1983,15 +1985,54 @@ private:
Expression VoteEqual(Operation operation) {
if (!device.HasWarpIntrinsics()) {
- LOG_ERROR(Render_OpenGL,
- "Nvidia vote intrinsics are not available and its required by a shader");
- // We must return true here since a stub for a theoretical warp size of 1 will always
- // return an equal result for all its votes.
+ LOG_ERROR(Render_OpenGL, "Nvidia vote intrinsics are required by this shader");
+ // We must return true here since a stub for a theoretical warp size of 1.
+ // This will always return an equal result across all votes.
return {"true", Type::Bool};
}
return Vote(operation, "allThreadsEqualNV");
}
+ template <const std::string_view& func>
+ Expression Shuffle(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsFloat();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
+ // On a "single-thread" device we are either on the same thread or out of bounds. Both
+ // cases return the passed value.
+ return {value, Type::Float};
+ }
+
+ const std::string index = VisitOperand(operation, 1).AsUint();
+ const std::string width = VisitOperand(operation, 2).AsUint();
+ return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+ }
+
+ template <const std::string_view& func>
+ Expression InRangeShuffle(Operation operation) {
+ const std::string index = VisitOperand(operation, 0).AsUint();
+ const std::string width = VisitOperand(operation, 1).AsUint();
+ if (!device.HasWarpIntrinsics()) {
+ // On a "single-thread" device we are only in bounds when the requested index is 0.
+ return {fmt::format("({} == 0U)", index), Type::Bool};
+ }
+
+ const std::string in_range = code.GenerateTemporary();
+ code.AddLine("bool {};", in_range);
+ code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
+ return {in_range, Type::Bool};
+ }
+
+ struct Func final {
+ Func() = delete;
+ ~Func() = delete;
+
+ static constexpr std::string_view ShuffleIndexed = "shuffleNV";
+ static constexpr std::string_view ShuffleUp = "shuffleUpNV";
+ static constexpr std::string_view ShuffleDown = "shuffleDownNV";
+ static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
+ };
+
static constexpr std::array operation_decompilers = {
&GLSLDecompiler::Assign,
@@ -2154,6 +2195,16 @@ private:
&GLSLDecompiler::VoteAll,
&GLSLDecompiler::VoteAny,
&GLSLDecompiler::VoteEqual,
+
+ &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
+ &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
+
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
+ &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index ea77dd211..9ed738171 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -145,7 +145,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
case Tegra::Texture::TextureMipmapFilter::None:
return GL_LINEAR;
case Tegra::Texture::TextureMipmapFilter::Nearest:
- return GL_NEAREST_MIPMAP_LINEAR;
+ return GL_LINEAR_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
return GL_LINEAR_MIPMAP_LINEAR;
}
@@ -157,7 +157,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode,
case Tegra::Texture::TextureMipmapFilter::Nearest:
return GL_NEAREST_MIPMAP_NEAREST;
case Tegra::Texture::TextureMipmapFilter::Linear:
- return GL_LINEAR_MIPMAP_NEAREST;
+ return GL_NEAREST_MIPMAP_LINEAR;
}
}
}
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b9153934e..f7fbbb6e4 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1127,6 +1127,46 @@ private:
return {};
}
+ Id ShuffleIndexed(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleUp(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleDown(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id ShuffleButterfly(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleIndexed(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleUp(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleDown(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id InRangeShuffleButterfly(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
const std::string& name) {
const Id id = OpVariable(type, storage);
@@ -1431,6 +1471,16 @@ private:
&SPIRVDecompiler::VoteAll,
&SPIRVDecompiler::VoteAny,
&SPIRVDecompiler::VoteEqual,
+
+ &SPIRVDecompiler::ShuffleIndexed,
+ &SPIRVDecompiler::ShuffleUp,
+ &SPIRVDecompiler::ShuffleDown,
+ &SPIRVDecompiler::ShuffleButterfly,
+
+ &SPIRVDecompiler::InRangeShuffleIndexed,
+ &SPIRVDecompiler::InRangeShuffleUp,
+ &SPIRVDecompiler::InRangeShuffleDown,
+ &SPIRVDecompiler::InRangeShuffleButterfly,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 04ca74f46..a8e481b3c 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -13,6 +13,7 @@ namespace VideoCommon::Shader {
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Pred;
+using Tegra::Shader::ShuffleOperation;
using Tegra::Shader::VoteOperation;
namespace {
@@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
SetPredicate(bb, instr.vote.dest_pred, vote);
break;
}
+ case OpCode::Id::SHFL: {
+ Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
+ : GetRegister(instr.gpr39);
+ Node width = [&] {
+ // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
+ // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
+ // different parameters that don't properly map to GLSL's interface, but it should work
+ // for cases emitted by Nvidia's compiler.
+ if (instr.shfl.operation == ShuffleOperation::Up) {
+ return Operation(
+ OperationCode::ILogicalShiftRight,
+ Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
+ Immediate(8));
+ } else {
+ return Operation(OperationCode::ILogicalShiftRight,
+ Operation(OperationCode::IAdd, Immediate(0x201F),
+ Operation(OperationCode::INegate, std::move(mask))),
+ Immediate(8));
+ }
+ }();
+
+ const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+ switch (instr.shfl.operation) {
+ case ShuffleOperation::Idx:
+ return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
+ case ShuffleOperation::Up:
+ return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+ case ShuffleOperation::Down:
+ return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+ case ShuffleOperation::Bfly:
+ return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+ }
+ UNREACHABLE_MSG("Invalid SHFL operation: {}",
+ static_cast<u64>(instr.shfl.operation.Value()));
+ return {};
+ }();
+
+ // Setting the predicate before the register is intentional to avoid overwriting.
+ Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+ : GetRegister(instr.gpr20);
+ SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+ SetRegister(
+ bb, instr.gpr0,
+ Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+ break;
+ }
default:
UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
break;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 425111cc4..abf2cb1ab 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -181,6 +181,16 @@ enum class OperationCode {
VoteAny, /// (bool) -> bool
VoteEqual, /// (bool) -> bool
+ ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
+ ShuffleUp, /// (uint value, uint index, uint width) -> uint
+ ShuffleDown, /// (uint value, uint index, uint width) -> uint
+ ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
+
+ InRangeShuffleIndexed, /// (uint index, uint width) -> bool
+ InRangeShuffleUp, /// (uint index, uint width) -> bool
+ InRangeShuffleDown, /// (uint index, uint width) -> bool
+ InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+
Amount,
};
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index f594106bf..3f54f54fb 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -466,6 +466,9 @@ void Config::ReadDataStorageValues() {
void Config::ReadDebuggingValues() {
qt_config->beginGroup(QStringLiteral("Debugging"));
+ // Intentionally not using the QT default setting as this is intended to be changed in the ini
+ Settings::values.record_frame_times =
+ qt_config->value(QStringLiteral("record_frame_times"), false).toBool();
Settings::values.use_gdbstub = ReadSetting(QStringLiteral("use_gdbstub"), false).toBool();
Settings::values.gdbstub_port = ReadSetting(QStringLiteral("gdbstub_port"), 24689).toInt();
Settings::values.program_args =
@@ -879,6 +882,8 @@ void Config::SaveDataStorageValues() {
void Config::SaveDebuggingValues() {
qt_config->beginGroup(QStringLiteral("Debugging"));
+ // Intentionally not using the QT default setting as this is intended to be changed in the ini
+ qt_config->setValue(QStringLiteral("record_frame_times"), Settings::values.record_frame_times);
WriteSetting(QStringLiteral("use_gdbstub"), Settings::values.use_gdbstub, false);
WriteSetting(QStringLiteral("gdbstub_port"), Settings::values.gdbstub_port, 24689);
WriteSetting(QStringLiteral("program_args"),
diff --git a/src/yuzu/configuration/configure_input.cpp b/src/yuzu/configuration/configure_input.cpp
index 7613197f2..f2977719c 100644
--- a/src/yuzu/configuration/configure_input.cpp
+++ b/src/yuzu/configuration/configure_input.cpp
@@ -182,6 +182,8 @@ void ConfigureInput::UpdateUIEnabled() {
players_configure[i]->setEnabled(players_controller[i]->currentIndex() != 0);
}
+ ui->handheld_connected->setChecked(ui->handheld_connected->isChecked() &&
+ !ui->use_docked_mode->isChecked());
ui->handheld_connected->setEnabled(!ui->use_docked_mode->isChecked());
ui->handheld_configure->setEnabled(ui->handheld_connected->isChecked() &&
!ui->use_docked_mode->isChecked());
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 8304c6517..1dcfac258 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -54,6 +54,7 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include <QProgressDialog>
#include <QShortcut>
#include <QStatusBar>
+#include <QSysInfo>
#include <QtConcurrent/QtConcurrent>
#include <fmt/format.h>
@@ -66,6 +67,9 @@ static FileSys::VirtualFile VfsDirectoryCreateFileWrapper(const FileSys::Virtual
#include "common/microprofile.h"
#include "common/scm_rev.h"
#include "common/scope_exit.h"
+#ifdef ARCHITECTURE_x86_64
+#include "common/x64/cpu_detect.h"
+#endif
#include "common/telemetry.h"
#include "core/core.h"
#include "core/crypto/key_manager.h"
@@ -205,6 +209,10 @@ GMainWindow::GMainWindow()
LOG_INFO(Frontend, "yuzu Version: {} | {}-{}", Common::g_build_fullname, Common::g_scm_branch,
Common::g_scm_desc);
+#ifdef ARCHITECTURE_x86_64
+ LOG_INFO(Frontend, "Host CPU: {}", Common::GetCPUCaps().cpu_string);
+#endif
+ LOG_INFO(Frontend, "Host OS: {}", QSysInfo::prettyProductName().toStdString());
UpdateWindowTitle();
show();
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 067d58d80..5cadfd191 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -374,6 +374,8 @@ void Config::ReadValues() {
Settings::values.use_dev_keys = sdl2_config->GetBoolean("Miscellaneous", "use_dev_keys", false);
// Debugging
+ Settings::values.record_frame_times =
+ sdl2_config->GetBoolean("Debugging", "record_frame_times", false);
Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false);
Settings::values.gdbstub_port =
static_cast<u16>(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689));
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 0cfc111a6..f9f244522 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -213,6 +213,8 @@ region_value =
log_filter = *:Trace
[Debugging]
+# Record frame time data, can be found in the log directory. Boolean value
+record_frame_times =
# Port for listening to GDB connections.
use_gdbstub=false
gdbstub_port=24689