summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/core/arm/arm_interface.h14
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp23
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h6
-rw-r--r--src/core/arm/unicorn/arm_unicorn.h2
-rw-r--r--src/core/core.cpp33
-rw-r--r--src/core/cpu_core_manager.cpp6
-rw-r--r--src/core/cpu_core_manager.h7
-rw-r--r--src/core/hle/kernel/kernel.cpp7
-rw-r--r--src/core/hle/kernel/process.cpp22
-rw-r--r--src/core/hle/kernel/process.h7
-rw-r--r--src/core/hle/kernel/svc.cpp20
-rw-r--r--src/core/hle/kernel/thread.h12
-rw-r--r--src/core/loader/deconstructed_rom_directory.cpp40
-rw-r--r--src/core/loader/deconstructed_rom_directory.h2
-rw-r--r--src/core/loader/elf.cpp15
-rw-r--r--src/core/loader/elf.h2
-rw-r--r--src/core/loader/loader.h8
-rw-r--r--src/core/loader/nax.cpp30
-rw-r--r--src/core/loader/nax.h2
-rw-r--r--src/core/loader/nca.cpp26
-rw-r--r--src/core/loader/nca.h2
-rw-r--r--src/core/loader/nro.cpp14
-rw-r--r--src/core/loader/nro.h2
-rw-r--r--src/core/loader/nso.cpp11
-rw-r--r--src/core/loader/nso.h2
-rw-r--r--src/core/loader/nsp.cpp38
-rw-r--r--src/core/loader/nsp.h2
-rw-r--r--src/core/loader/xci.cpp28
-rw-r--r--src/core/loader/xci.h2
-rw-r--r--src/core/memory.cpp16
-rw-r--r--src/core/memory.h5
-rw-r--r--src/video_core/CMakeLists.txt4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp39
-rw-r--r--src/video_core/engines/maxwell_3d.h6
-rw-r--r--src/video_core/engines/shader_bytecode.h36
-rw-r--r--src/video_core/gpu.h5
-rw-r--r--src/video_core/gpu_asynch.cpp6
-rw-r--r--src/video_core/gpu_asynch.h5
-rw-r--r--src/video_core/gpu_synch.cpp4
-rw-r--r--src/video_core/gpu_synch.h1
-rw-r--r--src/video_core/gpu_thread.cpp17
-rw-r--r--src/video_core/gpu_thread.h6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp104
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h32
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.h25
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp14
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.cpp40
-rw-r--r--src/video_core/renderer_vulkan/vk_sampler_cache.h36
-rw-r--r--src/video_core/sampler_cache.cpp21
-rw-r--r--src/video_core/sampler_cache.h60
-rw-r--r--src/video_core/shader/decode/texture.cpp113
-rw-r--r--src/video_core/shader/shader_ir.h40
-rw-r--r--src/video_core/video_core.cpp10
-rw-r--r--src/video_core/video_core.h7
-rw-r--r--src/yuzu/bootmanager.cpp7
-rw-r--r--src/yuzu/bootmanager.h1
58 files changed, 674 insertions, 426 deletions
diff --git a/src/core/arm/arm_interface.h b/src/core/arm/arm_interface.h
index 4dfd41b43..978b1518f 100644
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@@ -7,6 +7,10 @@
#include <array>
#include "common/common_types.h"
+namespace Common {
+struct PageTable;
+}
+
namespace Kernel {
enum class VMAPermission : u8;
}
@@ -49,8 +53,14 @@ public:
/// Clear all instruction cache
virtual void ClearInstructionCache() = 0;
- /// Notify CPU emulation that page tables have changed
- virtual void PageTableChanged() = 0;
+ /// Notifies CPU emulation that the current page table has changed.
+ ///
+ /// @param new_page_table The new page table.
+ /// @param new_address_space_size_in_bits The new usable size of the address space in bits.
+ /// This can be either 32, 36, or 39 on official software.
+ ///
+ virtual void PageTableChanged(Common::PageTable& new_page_table,
+ std::size_t new_address_space_size_in_bits) = 0;
/**
* Set the Program Counter to an address
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index dc96e35d5..44307fa19 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -14,7 +14,6 @@
#include "core/core_timing.h"
#include "core/core_timing_util.h"
#include "core/gdbstub/gdbstub.h"
-#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/svc.h"
#include "core/hle/kernel/vm_manager.h"
@@ -129,18 +128,16 @@ public:
u64 tpidr_el0 = 0;
};
-std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
- auto* current_process = system.Kernel().CurrentProcess();
- auto** const page_table = current_process->VMManager().page_table.pointers.data();
-
+std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit(Common::PageTable& page_table,
+ std::size_t address_space_bits) const {
Dynarmic::A64::UserConfig config;
// Callbacks
config.callbacks = cb.get();
// Memory
- config.page_table = reinterpret_cast<void**>(page_table);
- config.page_table_address_space_bits = current_process->VMManager().GetAddressSpaceWidth();
+ config.page_table = reinterpret_cast<void**>(page_table.pointers.data());
+ config.page_table_address_space_bits = address_space_bits;
config.silently_mirror_page_table = false;
// Multi-process state
@@ -176,12 +173,7 @@ ARM_Dynarmic::ARM_Dynarmic(System& system, ExclusiveMonitor& exclusive_monitor,
std::size_t core_index)
: cb(std::make_unique<ARM_Dynarmic_Callbacks>(*this)), inner_unicorn{system},
core_index{core_index}, system{system},
- exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {
- ThreadContext ctx{};
- inner_unicorn.SaveContext(ctx);
- PageTableChanged();
- LoadContext(ctx);
-}
+ exclusive_monitor{dynamic_cast<DynarmicExclusiveMonitor&>(exclusive_monitor)} {}
ARM_Dynarmic::~ARM_Dynarmic() = default;
@@ -276,8 +268,9 @@ void ARM_Dynarmic::ClearExclusiveState() {
jit->ClearExclusiveState();
}
-void ARM_Dynarmic::PageTableChanged() {
- jit = MakeJit();
+void ARM_Dynarmic::PageTableChanged(Common::PageTable& page_table,
+ std::size_t new_address_space_size_in_bits) {
+ jit = MakeJit(page_table, new_address_space_size_in_bits);
}
DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(std::size_t core_count) : monitor(core_count) {}
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index c1db254e8..b701e97a3 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -48,10 +48,12 @@ public:
void ClearExclusiveState() override;
void ClearInstructionCache() override;
- void PageTableChanged() override;
+ void PageTableChanged(Common::PageTable& new_page_table,
+ std::size_t new_address_space_size_in_bits) override;
private:
- std::unique_ptr<Dynarmic::A64::Jit> MakeJit() const;
+ std::unique_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable& page_table,
+ std::size_t address_space_bits) const;
friend class ARM_Dynarmic_Callbacks;
std::unique_ptr<ARM_Dynarmic_Callbacks> cb;
diff --git a/src/core/arm/unicorn/arm_unicorn.h b/src/core/arm/unicorn/arm_unicorn.h
index 209fc16ad..34e974b4d 100644
--- a/src/core/arm/unicorn/arm_unicorn.h
+++ b/src/core/arm/unicorn/arm_unicorn.h
@@ -41,7 +41,7 @@ public:
void Run() override;
void Step() override;
void ClearInstructionCache() override;
- void PageTableChanged() override{};
+ void PageTableChanged(Common::PageTable&, std::size_t) override {}
void RecordBreak(GDBStub::BreakpointAddress bkpt);
private:
diff --git a/src/core/core.cpp b/src/core/core.cpp
index bc9e887b6..175a5f2ea 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -3,9 +3,7 @@
// Refer to the license.txt file included.
#include <array>
-#include <map>
#include <memory>
-#include <thread>
#include <utility>
#include "common/file_util.h"
@@ -38,8 +36,6 @@
#include "frontend/applets/software_keyboard.h"
#include "frontend/applets/web_browser.h"
#include "video_core/debug_utils/debug_utils.h"
-#include "video_core/gpu_asynch.h"
-#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
#include "video_core/video_core.h"
@@ -81,7 +77,7 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
return vfs->OpenFile(path, FileSys::Mode::Read);
}
struct System::Impl {
- explicit Impl(System& system) : kernel{system} {}
+ explicit Impl(System& system) : kernel{system}, cpu_core_manager{system} {}
Cpu& CurrentCpuCore() {
return cpu_core_manager.GetCurrentCore();
@@ -99,6 +95,7 @@ struct System::Impl {
LOG_DEBUG(HW_Memory, "initialized OK");
core_timing.Initialize();
+ cpu_core_manager.Initialize();
kernel.Initialize();
const auto current_time = std::chrono::duration_cast<std::chrono::seconds>(
@@ -120,9 +117,6 @@ struct System::Impl {
if (web_browser == nullptr)
web_browser = std::make_unique<Core::Frontend::DefaultWebBrowserApplet>();
- auto main_process = Kernel::Process::Create(system, "main");
- kernel.MakeCurrentProcess(main_process.get());
-
telemetry_session = std::make_unique<Core::TelemetrySession>();
service_manager = std::make_shared<Service::SM::ServiceManager>();
@@ -134,15 +128,9 @@ struct System::Impl {
return ResultStatus::ErrorVideoCore;
}
- is_powered_on = true;
-
- if (Settings::values.use_asynchronous_gpu_emulation) {
- gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer);
- } else {
- gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer);
- }
+ gpu_core = VideoCore::CreateGPU(system);
- cpu_core_manager.Initialize(system);
+ is_powered_on = true;
LOG_DEBUG(Core, "Initialized OK");
@@ -179,7 +167,8 @@ struct System::Impl {
return init_result;
}
- const Loader::ResultStatus load_result{app_loader->Load(*kernel.CurrentProcess())};
+ auto main_process = Kernel::Process::Create(system, "main");
+ const auto [load_result, load_parameters] = app_loader->Load(*main_process);
if (load_result != Loader::ResultStatus::Success) {
LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", static_cast<int>(load_result));
Shutdown();
@@ -187,6 +176,16 @@ struct System::Impl {
return static_cast<ResultStatus>(static_cast<u32>(ResultStatus::ErrorLoader) +
static_cast<u32>(load_result));
}
+ kernel.MakeCurrentProcess(main_process.get());
+
+ // Main process has been loaded and been made current.
+ // Begin GPU and CPU execution.
+ gpu_core->Start();
+ cpu_core_manager.StartThreads();
+
+ // All threads are started, begin main process execution, now that we're in the clear.
+ main_process->Run(load_parameters->main_thread_priority,
+ load_parameters->main_thread_stack_size);
status = ResultStatus::Success;
return status;
diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp
index 93bc5619c..8fcb4eeb1 100644
--- a/src/core/cpu_core_manager.cpp
+++ b/src/core/cpu_core_manager.cpp
@@ -19,17 +19,19 @@ void RunCpuCore(const System& system, Cpu& cpu_state) {
}
} // Anonymous namespace
-CpuCoreManager::CpuCoreManager() = default;
+CpuCoreManager::CpuCoreManager(System& system) : system{system} {}
CpuCoreManager::~CpuCoreManager() = default;
-void CpuCoreManager::Initialize(System& system) {
+void CpuCoreManager::Initialize() {
barrier = std::make_unique<CpuBarrier>();
exclusive_monitor = Cpu::MakeExclusiveMonitor(cores.size());
for (std::size_t index = 0; index < cores.size(); ++index) {
cores[index] = std::make_unique<Cpu>(system, *exclusive_monitor, *barrier, index);
}
+}
+void CpuCoreManager::StartThreads() {
// Create threads for CPU cores 1-3, and build thread_to_cpu map
// CPU core 0 is run on the main thread
thread_to_cpu[std::this_thread::get_id()] = cores[0].get();
diff --git a/src/core/cpu_core_manager.h b/src/core/cpu_core_manager.h
index a4d70ec56..2cbbf8216 100644
--- a/src/core/cpu_core_manager.h
+++ b/src/core/cpu_core_manager.h
@@ -18,7 +18,7 @@ class System;
class CpuCoreManager {
public:
- CpuCoreManager();
+ explicit CpuCoreManager(System& system);
CpuCoreManager(const CpuCoreManager&) = delete;
CpuCoreManager(CpuCoreManager&&) = delete;
@@ -27,7 +27,8 @@ public:
CpuCoreManager& operator=(const CpuCoreManager&) = delete;
CpuCoreManager& operator=(CpuCoreManager&&) = delete;
- void Initialize(System& system);
+ void Initialize();
+ void StartThreads();
void Shutdown();
Cpu& GetCore(std::size_t index);
@@ -54,6 +55,8 @@ private:
/// Map of guest threads to CPU cores
std::map<std::thread::id, Cpu*> thread_to_cpu;
+
+ System& system;
};
} // namespace Core
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 4d58e7c69..8539fabe4 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -182,7 +182,12 @@ void KernelCore::AppendNewProcess(SharedPtr<Process> process) {
void KernelCore::MakeCurrentProcess(Process* process) {
impl->current_process = process;
- Memory::SetCurrentPageTable(&process->VMManager().page_table);
+
+ if (process == nullptr) {
+ return;
+ }
+
+ Memory::SetCurrentPageTable(*process);
}
Process* KernelCore::CurrentProcess() {
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 4e94048da..6d7a7e754 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -28,21 +28,20 @@ namespace {
*
* @param owner_process The parent process for the main thread
* @param kernel The kernel instance to create the main thread under.
- * @param entry_point The address at which the thread should start execution
* @param priority The priority to give the main thread
*/
-void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
- // Initialize new "main" thread
- const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
+void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) {
+ const auto& vm_manager = owner_process.VMManager();
+ const VAddr entry_point = vm_manager.GetCodeRegionBaseAddress();
+ const VAddr stack_top = vm_manager.GetTLSIORegionEndAddress();
auto thread_res = Thread::Create(kernel, "main", entry_point, priority, 0,
owner_process.GetIdealCore(), stack_top, owner_process);
SharedPtr<Thread> thread = std::move(thread_res).Unwrap();
// Register 1 must be a handle to the main thread
- const Handle guest_handle = owner_process.GetHandleTable().Create(thread).Unwrap();
- thread->SetGuestHandle(guest_handle);
- thread->GetContext().cpu_registers[1] = guest_handle;
+ const Handle thread_handle = owner_process.GetHandleTable().Create(thread).Unwrap();
+ thread->GetContext().cpu_registers[1] = thread_handle;
// Threads by default are dormant, wake up the main thread so it runs when the scheduler fires
thread->ResumeFromWait();
@@ -106,8 +105,6 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
is_64bit_process = metadata.Is64BitProgram();
vm_manager.Reset(metadata.GetAddressSpaceType());
- // Ensure that the potentially resized page table is seen by CPU backends.
- Memory::SetCurrentPageTable(&vm_manager.page_table);
const auto& caps = metadata.GetKernelCapabilities();
const auto capability_init_result =
@@ -119,7 +116,7 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata) {
return handle_table.SetSize(capabilities.GetHandleTableSize());
}
-void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
+void Process::Run(s32 main_thread_priority, u64 stack_size) {
// The kernel always ensures that the given stack size is page aligned.
main_thread_stack_size = Common::AlignUp(stack_size, Memory::PAGE_SIZE);
@@ -135,7 +132,7 @@ void Process::Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size) {
vm_manager.LogLayout();
ChangeStatus(ProcessStatus::Running);
- SetupMainThread(*this, kernel, entry_point, main_thread_priority);
+ SetupMainThread(*this, kernel, main_thread_priority);
}
void Process::PrepareForTermination() {
@@ -242,9 +239,6 @@ void Process::LoadModule(CodeSet module_, VAddr base_addr) {
MapSegment(module_.DataSegment(), VMAPermission::ReadWrite, MemoryState::CodeData);
code_memory_size += module_.memory.size();
-
- // Clear instruction cache in CPU JIT
- system.InvalidateCpuInstructionCaches();
}
Process::Process(Core::System& system)
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index dda52f4c0..bf3b7eef3 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -225,9 +225,12 @@ public:
ResultCode LoadFromMetadata(const FileSys::ProgramMetadata& metadata);
/**
- * Applies address space changes and launches the process main thread.
+ * Starts the main application thread for this process.
+ *
+ * @param main_thread_priority The priority for the main thread.
+ * @param stack_size The stack size for the main thread in bytes.
*/
- void Run(VAddr entry_point, s32 main_thread_priority, u64 stack_size);
+ void Run(s32 main_thread_priority, u64 stack_size);
/**
* Prepares a process for termination by stopping all of its threads
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index d48a2203a..4c763b288 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -1380,20 +1380,22 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e
return ERR_INVALID_THREAD_PRIORITY;
}
- const std::string name = fmt::format("thread-{:X}", entry_point);
auto& kernel = system.Kernel();
CASCADE_RESULT(SharedPtr<Thread> thread,
- Thread::Create(kernel, name, entry_point, priority, arg, processor_id, stack_top,
+ Thread::Create(kernel, "", entry_point, priority, arg, processor_id, stack_top,
*current_process));
- const auto new_guest_handle = current_process->GetHandleTable().Create(thread);
- if (new_guest_handle.Failed()) {
+ const auto new_thread_handle = current_process->GetHandleTable().Create(thread);
+ if (new_thread_handle.Failed()) {
LOG_ERROR(Kernel_SVC, "Failed to create handle with error=0x{:X}",
- new_guest_handle.Code().raw);
- return new_guest_handle.Code();
+ new_thread_handle.Code().raw);
+ return new_thread_handle.Code();
}
- thread->SetGuestHandle(*new_guest_handle);
- *out_handle = *new_guest_handle;
+ *out_handle = *new_thread_handle;
+
+ // Set the thread name for debugging purposes.
+ thread->SetName(
+ fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle));
system.CpuCore(thread->GetProcessorID()).PrepareReschedule();
@@ -2288,7 +2290,7 @@ static const FunctionDef SVC_Table[] = {
{0x33, SvcWrap<GetThreadContext>, "GetThreadContext"},
{0x34, SvcWrap<WaitForAddress>, "WaitForAddress"},
{0x35, SvcWrap<SignalToAddress>, "SignalToAddress"},
- {0x36, nullptr, "Unknown"},
+ {0x36, nullptr, "SynchronizePreemptionState"},
{0x37, nullptr, "Unknown"},
{0x38, nullptr, "Unknown"},
{0x39, nullptr, "Unknown"},
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 32026d7f0..411a73b49 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -102,6 +102,11 @@ public:
std::string GetName() const override {
return name;
}
+
+ void SetName(std::string new_name) {
+ name = std::move(new_name);
+ }
+
std::string GetTypeName() const override {
return "Thread";
}
@@ -339,10 +344,6 @@ public:
arb_wait_address = address;
}
- void SetGuestHandle(Handle handle) {
- guest_handle = handle;
- }
-
bool HasWakeupCallback() const {
return wakeup_callback != nullptr;
}
@@ -436,9 +437,6 @@ private:
/// If waiting for an AddressArbiter, this is the address being waited on.
VAddr arb_wait_address{0};
- /// Handle used by guest emulated application to access this thread
- Handle guest_handle = 0;
-
/// Handle used as userdata to reference this object when inserting into the CoreTiming queue.
Handle callback_handle = 0;
diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp
index 07aa7a1cd..10b13fb1d 100644
--- a/src/core/loader/deconstructed_rom_directory.cpp
+++ b/src/core/loader/deconstructed_rom_directory.cpp
@@ -86,25 +86,29 @@ FileType AppLoader_DeconstructedRomDirectory::IdentifyType(const FileSys::Virtua
return FileType::Error;
}
-ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process) {
+AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirectory::Load(
+ Kernel::Process& process) {
if (is_loaded) {
- return ResultStatus::ErrorAlreadyLoaded;
+ return {ResultStatus::ErrorAlreadyLoaded, {}};
}
if (dir == nullptr) {
- if (file == nullptr)
- return ResultStatus::ErrorNullFile;
+ if (file == nullptr) {
+ return {ResultStatus::ErrorNullFile, {}};
+ }
+
dir = file->GetContainingDirectory();
}
// Read meta to determine title ID
FileSys::VirtualFile npdm = dir->GetFile("main.npdm");
- if (npdm == nullptr)
- return ResultStatus::ErrorMissingNPDM;
+ if (npdm == nullptr) {
+ return {ResultStatus::ErrorMissingNPDM, {}};
+ }
- ResultStatus result = metadata.Load(npdm);
+ const ResultStatus result = metadata.Load(npdm);
if (result != ResultStatus::Success) {
- return result;
+ return {result, {}};
}
if (override_update) {
@@ -114,23 +118,24 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
// Reread in case PatchExeFS affected the main.npdm
npdm = dir->GetFile("main.npdm");
- if (npdm == nullptr)
- return ResultStatus::ErrorMissingNPDM;
+ if (npdm == nullptr) {
+ return {ResultStatus::ErrorMissingNPDM, {}};
+ }
- ResultStatus result2 = metadata.Load(npdm);
+ const ResultStatus result2 = metadata.Load(npdm);
if (result2 != ResultStatus::Success) {
- return result2;
+ return {result2, {}};
}
metadata.Print();
const FileSys::ProgramAddressSpaceType arch_bits{metadata.GetAddressSpaceType()};
if (arch_bits == FileSys::ProgramAddressSpaceType::Is32Bit ||
arch_bits == FileSys::ProgramAddressSpaceType::Is32BitNoMap) {
- return ResultStatus::Error32BitISA;
+ return {ResultStatus::Error32BitISA, {}};
}
if (process.LoadFromMetadata(metadata).IsError()) {
- return ResultStatus::ErrorUnableToParseKernelMetadata;
+ return {ResultStatus::ErrorUnableToParseKernelMetadata, {}};
}
const FileSys::PatchManager pm(metadata.GetTitleID());
@@ -150,7 +155,7 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
const auto tentative_next_load_addr =
AppLoader_NSO::LoadModule(process, *module_file, load_addr, should_pass_arguments, pm);
if (!tentative_next_load_addr) {
- return ResultStatus::ErrorLoadingNSO;
+ return {ResultStatus::ErrorLoadingNSO, {}};
}
next_load_addr = *tentative_next_load_addr;
@@ -159,8 +164,6 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
GDBStub::RegisterModule(module, load_addr, next_load_addr - 1, false);
}
- process.Run(base_address, metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize());
-
// Find the RomFS by searching for a ".romfs" file in this directory
const auto& files = dir->GetFiles();
const auto romfs_iter =
@@ -175,7 +178,8 @@ ResultStatus AppLoader_DeconstructedRomDirectory::Load(Kernel::Process& process)
}
is_loaded = true;
- return ResultStatus::Success;
+ return {ResultStatus::Success,
+ LoadParameters{metadata.GetMainThreadPriority(), metadata.GetMainThreadStackSize()}};
}
ResultStatus AppLoader_DeconstructedRomDirectory::ReadRomFS(FileSys::VirtualFile& dir) {
diff --git a/src/core/loader/deconstructed_rom_directory.h b/src/core/loader/deconstructed_rom_directory.h
index 1615cb5a8..1a65c16a4 100644
--- a/src/core/loader/deconstructed_rom_directory.h
+++ b/src/core/loader/deconstructed_rom_directory.h
@@ -37,7 +37,7 @@ public:
return IdentifyType(file);
}
- ResultStatus Load(Kernel::Process& process) override;
+ LoadResult Load(Kernel::Process& process) override;
ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
ResultStatus ReadIcon(std::vector<u8>& buffer) override;
diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp
index 46ac372f6..6d4b02375 100644
--- a/src/core/loader/elf.cpp
+++ b/src/core/loader/elf.cpp
@@ -382,13 +382,15 @@ FileType AppLoader_ELF::IdentifyType(const FileSys::VirtualFile& file) {
return FileType::Error;
}
-ResultStatus AppLoader_ELF::Load(Kernel::Process& process) {
- if (is_loaded)
- return ResultStatus::ErrorAlreadyLoaded;
+AppLoader_ELF::LoadResult AppLoader_ELF::Load(Kernel::Process& process) {
+ if (is_loaded) {
+ return {ResultStatus::ErrorAlreadyLoaded, {}};
+ }
std::vector<u8> buffer = file->ReadAllBytes();
- if (buffer.size() != file->GetSize())
- return ResultStatus::ErrorIncorrectELFFileSize;
+ if (buffer.size() != file->GetSize()) {
+ return {ResultStatus::ErrorIncorrectELFFileSize, {}};
+ }
const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
ElfReader elf_reader(&buffer[0]);
@@ -396,10 +398,9 @@ ResultStatus AppLoader_ELF::Load(Kernel::Process& process) {
const VAddr entry_point = codeset.entrypoint;
process.LoadModule(std::move(codeset), entry_point);
- process.Run(entry_point, 48, Memory::DEFAULT_STACK_SIZE);
is_loaded = true;
- return ResultStatus::Success;
+ return {ResultStatus::Success, LoadParameters{48, Memory::DEFAULT_STACK_SIZE}};
}
} // namespace Loader
diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h
index a2d33021c..7ef7770a6 100644
--- a/src/core/loader/elf.h
+++ b/src/core/loader/elf.h
@@ -26,7 +26,7 @@ public:
return IdentifyType(file);
}
- ResultStatus Load(Kernel::Process& process) override;
+ LoadResult Load(Kernel::Process& process) override;
};
} // namespace Loader
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index bb925f4a6..f7846db52 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -131,6 +131,12 @@ std::ostream& operator<<(std::ostream& os, ResultStatus status);
/// Interface for loading an application
class AppLoader : NonCopyable {
public:
+ struct LoadParameters {
+ s32 main_thread_priority;
+ u64 main_thread_stack_size;
+ };
+ using LoadResult = std::pair<ResultStatus, std::optional<LoadParameters>>;
+
explicit AppLoader(FileSys::VirtualFile file);
virtual ~AppLoader();
@@ -145,7 +151,7 @@ public:
* @param process The newly created process.
* @return The status result of the operation.
*/
- virtual ResultStatus Load(Kernel::Process& process) = 0;
+ virtual LoadResult Load(Kernel::Process& process) = 0;
/**
* Loads the system mode that this application needs.
diff --git a/src/core/loader/nax.cpp b/src/core/loader/nax.cpp
index 93a970d10..34efef09a 100644
--- a/src/core/loader/nax.cpp
+++ b/src/core/loader/nax.cpp
@@ -41,31 +41,37 @@ FileType AppLoader_NAX::GetFileType() const {
return IdentifyTypeImpl(*nax);
}
-ResultStatus AppLoader_NAX::Load(Kernel::Process& process) {
+AppLoader_NAX::LoadResult AppLoader_NAX::Load(Kernel::Process& process) {
if (is_loaded) {
- return ResultStatus::ErrorAlreadyLoaded;
+ return {ResultStatus::ErrorAlreadyLoaded, {}};
}
- if (nax->GetStatus() != ResultStatus::Success)
- return nax->GetStatus();
+ const auto nax_status = nax->GetStatus();
+ if (nax_status != ResultStatus::Success) {
+ return {nax_status, {}};
+ }
const auto nca = nax->AsNCA();
if (nca == nullptr) {
- if (!Core::Crypto::KeyManager::KeyFileExists(false))
- return ResultStatus::ErrorMissingProductionKeyFile;
- return ResultStatus::ErrorNAXInconvertibleToNCA;
+ if (!Core::Crypto::KeyManager::KeyFileExists(false)) {
+ return {ResultStatus::ErrorMissingProductionKeyFile, {}};
+ }
+
+ return {ResultStatus::ErrorNAXInconvertibleToNCA, {}};
}
- if (nca->GetStatus() != ResultStatus::Success)
- return nca->GetStatus();
+ const auto nca_status = nca->GetStatus();
+ if (nca_status != ResultStatus::Success) {
+ return {nca_status, {}};
+ }
const auto result = nca_loader->Load(process);
- if (result != ResultStatus::Success)
+ if (result.first != ResultStatus::Success) {
return result;
+ }
is_loaded = true;
-
- return ResultStatus::Success;
+ return result;
}
ResultStatus AppLoader_NAX::ReadRomFS(FileSys::VirtualFile& dir) {
diff --git a/src/core/loader/nax.h b/src/core/loader/nax.h
index f40079574..00f1659c1 100644
--- a/src/core/loader/nax.h
+++ b/src/core/loader/nax.h
@@ -33,7 +33,7 @@ public:
FileType GetFileType() const override;
- ResultStatus Load(Kernel::Process& process) override;
+ LoadResult Load(Kernel::Process& process) override;
ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/loader/nca.cpp b/src/core/loader/nca.cpp
index ce8196fcf..b3f8f1083 100644
--- a/src/core/loader/nca.cpp
+++ b/src/core/loader/nca.cpp
@@ -30,36 +30,38 @@ FileType AppLoader_NCA::IdentifyType(const FileSys::VirtualFile& file) {
return FileType::Error;
}
-ResultStatus AppLoader_NCA::Load(Kernel::Process& process) {
+AppLoader_NCA::LoadResult AppLoader_NCA::Load(Kernel::Process& process) {
if (is_loaded) {
- return ResultStatus::ErrorAlreadyLoaded;
+ return {ResultStatus::ErrorAlreadyLoaded, {}};
}
const auto result = nca->GetStatus();
if (result != ResultStatus::Success) {
- return result;
+ return {result, {}};
}
- if (nca->GetType() != FileSys::NCAContentType::Program)
- return ResultStatus::ErrorNCANotProgram;
+ if (nca->GetType() != FileSys::NCAContentType::Program) {
+ return {ResultStatus::ErrorNCANotProgram, {}};
+ }
const auto exefs = nca->GetExeFS();
-
- if (exefs == nullptr)
- return ResultStatus::ErrorNoExeFS;
+ if (exefs == nullptr) {
+ return {ResultStatus::ErrorNoExeFS, {}};
+ }
directory_loader = std::make_unique<AppLoader_DeconstructedRomDirectory>(exefs, true);
const auto load_result = directory_loader->Load(process);
- if (load_result != ResultStatus::Success)
+ if (load_result.first != ResultStatus::Success) {
return load_result;
+ }
- if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0)
+ if (nca->GetRomFS() != nullptr && nca->GetRomFS()->GetSize() > 0) {
Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this));
+ }
is_loaded = true;
-
- return ResultStatus::Success;
+ return load_result;
}
ResultStatus AppLoader_NCA::ReadRomFS(FileSys::VirtualFile& dir) {
diff --git a/src/core/loader/nca.h b/src/core/loader/nca.h
index b9f077468..94f0ed677 100644
--- a/src/core/loader/nca.h
+++ b/src/core/loader/nca.h
@@ -33,7 +33,7 @@ public:
return IdentifyType(file);
}
- ResultStatus Load(Kernel::Process& process) override;
+ LoadResult Load(Kernel::Process& process) override;
ResultStatus ReadRomFS(FileSys::VirtualFile& dir) override;
u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp
index 31e4a0c84..6a0ca389b 100644
--- a/src/core/loader/nro.cpp
+++ b/src/core/loader/nro.cpp
@@ -201,25 +201,25 @@ bool AppLoader_NRO::LoadNro(Kernel::Process& process, const FileSys::VfsFile& fi
return LoadNroImpl(process, file.ReadAllBytes(), file.GetName(), load_base);
}
-ResultStatus AppLoader_NRO::Load(Kernel::Process& process) {
+AppLoader_NRO::LoadResult AppLoader_NRO::Load(Kernel::Process& process) {
if (is_loaded) {
- return ResultStatus::ErrorAlreadyLoaded;
+ return {ResultStatus::ErrorAlreadyLoaded, {}};
}
// Load NRO
const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
if (!LoadNro(process, *file, base_address)) {
- return ResultStatus::ErrorLoadingNRO;
+ return {ResultStatus::ErrorLoadingNRO, {}};
}
- if (romfs != nullptr)
+ if (romfs != nullptr) {
Service::FileSystem::RegisterRomFS(std::make_unique<FileSys::RomFSFactory>(*this));
-
- process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
+ }
is_loaded = true;
- return ResultStatus::Success;
+ return {ResultStatus::Success,
+ LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}};
}
ResultStatus AppLoader_NRO::ReadIcon(std::vector<u8>& buffer) {
diff --git a/src/core/loader/nro.h b/src/core/loader/nro.h
index 85b0ed644..1ffdae805 100644
--- a/src/core/loader/nro.h
+++ b/src/core/loader/nro.h
@@ -37,7 +37,7 @@ public:
return IdentifyType(file);
}
- ResultStatus Load(Kernel::Process& process) override;
+ LoadResult Load(Kernel::Process& process) override;
ResultStatus ReadIcon(std::vector<u8>& buffer) override;
ResultStatus ReadProgramId(u64& out_program_id) override;
diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp
index d7c47c197..a86653204 100644
--- a/src/core/loader/nso.cpp
+++ b/src/core/loader/nso.cpp
@@ -169,22 +169,21 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
return load_base + image_size;
}
-ResultStatus AppLoader_NSO::Load(Kernel::Process& process) {
+AppLoader_NSO::LoadResult AppLoader_NSO::Load(Kernel::Process& process) {
if (is_loaded) {
- return ResultStatus::ErrorAlreadyLoaded;
+ return {ResultStatus::ErrorAlreadyLoaded, {}};
}
// Load module
const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
if (!LoadModule(process, *file, base_address, true)) {
- return ResultStatus::ErrorLoadingNSO;
+ return {ResultStatus::ErrorLoadingNSO, {}};
}
LOG_DEBUG(Loader, "loaded module {} @ 0x{:X}", file->GetName(), base_address);
- process.Run(base_address, Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE);
-
is_loaded = true;
- return ResultStatus::Success;
+ return {ResultStatus::Success,
+ LoadParameters{Kernel::THREADPRIO_DEFAULT, Memory::DEFAULT_STACK_SIZE}};
}
} // namespace Loader
diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h
index 4674c3724..fdce9191c 100644
--- a/src/core/loader/nso.h
+++ b/src/core/loader/nso.h
@@ -84,7 +84,7 @@ public:
VAddr load_base, bool should_pass_arguments,
std::optional<FileSys::PatchManager> pm = {});
- ResultStatus Load(Kernel::Process& process) override;
+ LoadResult Load(Kernel::Process& process) override;
};
} // namespace Loader
diff --git a/src/core/loader/nsp.cpp b/src/core/loader/nsp.cpp
index 7da1f8960..ad56bbb38 100644
--- a/src/core/loader/nsp.cpp
+++ b/src/core/loader/nsp.cpp
@@ -72,37 +72,45 @@ FileType AppLoader_NSP::IdentifyType(const FileSys::VirtualFile& file) {
return FileType::Error;
}
-ResultStatus AppLoader_NSP::Load(Kernel::Process& process) {
+AppLoader_NSP::LoadResult AppLoader_NSP::Load(Kernel::Process& process) {
if (is_loaded) {
- return ResultStatus::ErrorAlreadyLoaded;
+ return {ResultStatus::ErrorAlreadyLoaded, {}};
}
- if (title_id == 0)
- return ResultStatus::ErrorNSPMissingProgramNCA;
+ if (title_id == 0) {
+ return {ResultStatus::ErrorNSPMissingProgramNCA, {}};
+ }
- if (nsp->GetStatus() != ResultStatus::Success)
- return nsp->GetStatus();
+ const auto nsp_status = nsp->GetStatus();
+ if (nsp_status != ResultStatus::Success) {
+ return {nsp_status, {}};
+ }
- if (nsp->GetProgramStatus(title_id) != ResultStatus::Success)
- return nsp->GetProgramStatus(title_id);
+ const auto nsp_program_status = nsp->GetProgramStatus(title_id);
+ if (nsp_program_status != ResultStatus::Success) {
+ return {nsp_program_status, {}};
+ }
if (nsp->GetNCA(title_id, FileSys::ContentRecordType::Program) == nullptr) {
- if (!Core::Crypto::KeyManager::KeyFileExists(false))
- return ResultStatus::ErrorMissingProductionKeyFile;
- return ResultStatus::ErrorNSPMissingProgramNCA;
+ if (!Core::Crypto::KeyManager::KeyFileExists(false)) {
+ return {ResultStatus::ErrorMissingProductionKeyFile, {}};
+ }
+
+ return {ResultStatus::ErrorNSPMissingProgramNCA, {}};
}
const auto result = secondary_loader->Load(process);
- if (result != ResultStatus::Success)
+ if (result.first != ResultStatus::Success) {
return result;
+ }
FileSys::VirtualFile update_raw;
- if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr)
+ if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) {
Service::FileSystem::SetPackedUpdate(std::move(update_raw));
+ }
is_loaded = true;
-
- return ResultStatus::Success;
+ return result;
}
ResultStatus AppLoader_NSP::ReadRomFS(FileSys::VirtualFile& file) {
diff --git a/src/core/loader/nsp.h b/src/core/loader/nsp.h
index 953a1b508..85e870bdf 100644
--- a/src/core/loader/nsp.h
+++ b/src/core/loader/nsp.h
@@ -35,7 +35,7 @@ public:
return IdentifyType(file);
}
- ResultStatus Load(Kernel::Process& process) override;
+ LoadResult Load(Kernel::Process& process) override;
ResultStatus ReadRomFS(FileSys::VirtualFile& file) override;
u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/loader/xci.cpp b/src/core/loader/xci.cpp
index 89f7bbf77..1e285a053 100644
--- a/src/core/loader/xci.cpp
+++ b/src/core/loader/xci.cpp
@@ -48,31 +48,35 @@ FileType AppLoader_XCI::IdentifyType(const FileSys::VirtualFile& file) {
return FileType::Error;
}
-ResultStatus AppLoader_XCI::Load(Kernel::Process& process) {
+AppLoader_XCI::LoadResult AppLoader_XCI::Load(Kernel::Process& process) {
if (is_loaded) {
- return ResultStatus::ErrorAlreadyLoaded;
+ return {ResultStatus::ErrorAlreadyLoaded, {}};
}
- if (xci->GetStatus() != ResultStatus::Success)
- return xci->GetStatus();
+ if (xci->GetStatus() != ResultStatus::Success) {
+ return {xci->GetStatus(), {}};
+ }
- if (xci->GetProgramNCAStatus() != ResultStatus::Success)
- return xci->GetProgramNCAStatus();
+ if (xci->GetProgramNCAStatus() != ResultStatus::Success) {
+ return {xci->GetProgramNCAStatus(), {}};
+ }
- if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false))
- return ResultStatus::ErrorMissingProductionKeyFile;
+ if (!xci->HasProgramNCA() && !Core::Crypto::KeyManager::KeyFileExists(false)) {
+ return {ResultStatus::ErrorMissingProductionKeyFile, {}};
+ }
const auto result = nca_loader->Load(process);
- if (result != ResultStatus::Success)
+ if (result.first != ResultStatus::Success) {
return result;
+ }
FileSys::VirtualFile update_raw;
- if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr)
+ if (ReadUpdateRaw(update_raw) == ResultStatus::Success && update_raw != nullptr) {
Service::FileSystem::SetPackedUpdate(std::move(update_raw));
+ }
is_loaded = true;
-
- return ResultStatus::Success;
+ return result;
}
ResultStatus AppLoader_XCI::ReadRomFS(FileSys::VirtualFile& file) {
diff --git a/src/core/loader/xci.h b/src/core/loader/xci.h
index 436f7387c..ae7145b14 100644
--- a/src/core/loader/xci.h
+++ b/src/core/loader/xci.h
@@ -35,7 +35,7 @@ public:
return IdentifyType(file);
}
- ResultStatus Load(Kernel::Process& process) override;
+ LoadResult Load(Kernel::Process& process) override;
ResultStatus ReadRomFS(FileSys::VirtualFile& file) override;
u64 ReadRomFSIVFCOffset() const override;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 4e0538bc2..f18f6226b 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -26,16 +26,16 @@ namespace Memory {
static Common::PageTable* current_page_table = nullptr;
-void SetCurrentPageTable(Common::PageTable* page_table) {
- current_page_table = page_table;
+void SetCurrentPageTable(Kernel::Process& process) {
+ current_page_table = &process.VMManager().page_table;
+
+ const std::size_t address_space_width = process.VMManager().GetAddressSpaceWidth();
auto& system = Core::System::GetInstance();
- if (system.IsPoweredOn()) {
- system.ArmInterface(0).PageTableChanged();
- system.ArmInterface(1).PageTableChanged();
- system.ArmInterface(2).PageTableChanged();
- system.ArmInterface(3).PageTableChanged();
- }
+ system.ArmInterface(0).PageTableChanged(*current_page_table, address_space_width);
+ system.ArmInterface(1).PageTableChanged(*current_page_table, address_space_width);
+ system.ArmInterface(2).PageTableChanged(*current_page_table, address_space_width);
+ system.ArmInterface(3).PageTableChanged(*current_page_table, address_space_width);
}
static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
diff --git a/src/core/memory.h b/src/core/memory.h
index 6845f5fe1..b9fa18b1d 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -40,8 +40,9 @@ enum : VAddr {
KERNEL_REGION_END = KERNEL_REGION_VADDR + KERNEL_REGION_SIZE,
};
-/// Changes the currently active page table.
-void SetCurrentPageTable(Common::PageTable* page_table);
+/// Changes the currently active page table to that of
+/// the given process instance.
+void SetCurrentPageTable(Kernel::Process& process);
/// Determines if the given VAddr is valid for the specified process.
bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 114bed20d..1e31a2900 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -46,6 +46,8 @@ add_library(video_core STATIC
renderer_opengl/gl_rasterizer_cache.h
renderer_opengl/gl_resource_manager.cpp
renderer_opengl/gl_resource_manager.h
+ renderer_opengl/gl_sampler_cache.cpp
+ renderer_opengl/gl_sampler_cache.h
renderer_opengl/gl_shader_cache.cpp
renderer_opengl/gl_shader_cache.h
renderer_opengl/gl_shader_decompiler.cpp
@@ -67,6 +69,8 @@ add_library(video_core STATIC
renderer_opengl/renderer_opengl.h
renderer_opengl/utils.cpp
renderer_opengl/utils.h
+ sampler_cache.cpp
+ sampler_cache.h
shader/decode/arithmetic.cpp
shader/decode/arithmetic_immediate.cpp
shader/decode/bfe.cpp
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 74403eed4..b198793bc 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -482,19 +482,8 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
return textures;
}
-Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
- std::size_t offset) const {
- auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
- auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
- ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
-
- const GPUVAddr tex_info_address =
- tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
-
- ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
-
- const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
-
+Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle,
+ std::size_t offset) const {
Texture::FullTextureInfo tex_info{};
tex_info.index = static_cast<u32>(offset);
@@ -511,6 +500,22 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
return tex_info;
}
+Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
+ std::size_t offset) const {
+ const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+ const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
+ ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
+
+ const GPUVAddr tex_info_address =
+ tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
+
+ ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
+
+ const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+
+ return GetTextureInfo(tex_handle, offset);
+}
+
u32 Maxwell3D::GetRegisterValue(u32 method) const {
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
return regs.reg_array[method];
@@ -524,4 +529,12 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer.Clear();
}
+u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const {
+ const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
+ const auto& buffer = shader_stage.const_buffers[const_buffer];
+ u32 result;
+ std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
+ return result;
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 321af3297..cc2424d38 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1131,12 +1131,18 @@ public:
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
+ /// Given a Texture Handle, returns the TSC and TIC entries.
+ Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
+ std::size_t offset) const;
+
/// Returns a list of enabled textures for the specified shader stage.
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
+ u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const;
+
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.
using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 38db4addd..fce9733b9 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -987,6 +987,38 @@ union Instruction {
} tex;
union {
+ BitField<28, 1, u64> array;
+ BitField<29, 2, TextureType> texture_type;
+ BitField<31, 4, u64> component_mask;
+ BitField<49, 1, u64> nodep_flag;
+ BitField<50, 1, u64> dc_flag;
+ BitField<36, 1, u64> aoffi_flag;
+ BitField<37, 3, TextureProcessMode> process_mode;
+
+ bool IsComponentEnabled(std::size_t component) const {
+ return ((1ULL << component) & component_mask) != 0;
+ }
+
+ TextureProcessMode GetTextureProcessMode() const {
+ return process_mode;
+ }
+
+ bool UsesMiscMode(TextureMiscMode mode) const {
+ switch (mode) {
+ case TextureMiscMode::DC:
+ return dc_flag != 0;
+ case TextureMiscMode::NODEP:
+ return nodep_flag != 0;
+ case TextureMiscMode::AOFFI:
+ return aoffi_flag != 0;
+ default:
+ break;
+ }
+ return false;
+ }
+ } tex_b;
+
+ union {
BitField<22, 6, TextureQueryType> query_type;
BitField<31, 4, u64> component_mask;
BitField<49, 1, u64> nodep_flag;
@@ -1332,7 +1364,9 @@ public:
LDG, // Load from global memory
STG, // Store in global memory
TEX,
+ TEX_B, // Texture Load Bindless
TXQ, // Texture Query
+ TXQ_B, // Texture Query Bindless
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLDS, // Texture Load with scalar/non-vec4 source/destinations
TLD4, // Texture Load 4
@@ -1600,7 +1634,9 @@ private:
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
+ INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
+ INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index de30ea354..fe6628923 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -207,6 +207,11 @@ public:
};
} regs{};
+ /// Performs any additional setup necessary in order to begin GPU emulation.
+ /// This can be used to launch any necessary threads and register any necessary
+ /// core timing events.
+ virtual void Start() = 0;
+
/// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index db507cf04..d4e2553a9 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -9,10 +9,14 @@
namespace VideoCommon {
GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer)
- : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {}
+ : GPU(system, renderer), gpu_thread{system} {}
GPUAsynch::~GPUAsynch() = default;
+void GPUAsynch::Start() {
+ gpu_thread.StartThread(renderer, *dma_pusher);
+}
+
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 1dcc61a6c..30be74cba 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -13,16 +13,13 @@ class RendererBase;
namespace VideoCommon {
-namespace GPUThread {
-class ThreadManager;
-} // namespace GPUThread
-
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch : public Tegra::GPU {
public:
explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUAsynch() override;
+ void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 2cfc900ed..45e43b1dc 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -8,10 +8,12 @@
namespace VideoCommon {
GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer)
- : Tegra::GPU(system, renderer) {}
+ : GPU(system, renderer) {}
GPUSynch::~GPUSynch() = default;
+void GPUSynch::Start() {}
+
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->Push(std::move(entries));
dma_pusher->DispatchCalls();
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 766b5631c..3031fcf72 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -18,6 +18,7 @@ public:
explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUSynch() override;
+ void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index cc56cf467..c9a2077de 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -55,19 +55,24 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
}
}
-ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
- Tegra::DmaPusher& dma_pusher)
- : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} {
- synchronization_event = system.CoreTiming().RegisterEvent(
- "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
-}
+ThreadManager::ThreadManager(Core::System& system) : system{system} {}
ThreadManager::~ThreadManager() {
+ if (!thread.joinable()) {
+ return;
+ }
+
// Notify GPU thread that a shutdown is pending
PushCommand(EndProcessingCommand());
thread.join();
}
+void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
+ thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
+ synchronization_event = system.CoreTiming().RegisterEvent(
+ "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
+}
+
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 62bcea5bb..cc14527c7 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -138,10 +138,12 @@ struct SynchState final {
/// Class used to manage the GPU thread
class ThreadManager final {
public:
- explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
- Tegra::DmaPusher& dma_pusher);
+ explicit ThreadManager(Core::System& system);
~ThreadManager();
+ /// Creates and starts the GPU thread.
+ void StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+
/// Push GPU command entries to be processed
void SubmitList(Tegra::CommandList&& entries);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0b1fe3494..6034dc489 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -101,12 +101,6 @@ struct FramebufferCacheKey {
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
: res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system},
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
- // Create sampler objects
- for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
- texture_samplers[i].Create();
- state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
- }
-
OpenGLState::ApplyDefaultState();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
@@ -813,92 +807,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::SamplerInfo::Create() {
- sampler.Create();
- mag_filter = Tegra::Texture::TextureFilter::Linear;
- min_filter = Tegra::Texture::TextureFilter::Linear;
- wrap_u = Tegra::Texture::WrapMode::Wrap;
- wrap_v = Tegra::Texture::WrapMode::Wrap;
- wrap_p = Tegra::Texture::WrapMode::Wrap;
- use_depth_compare = false;
- depth_compare_func = Tegra::Texture::DepthCompareFunc::Never;
-
- // OpenGL's default is GL_LINEAR_MIPMAP_LINEAR
- glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glSamplerParameteri(sampler.handle, GL_TEXTURE_COMPARE_FUNC, GL_NEVER);
-
- // Other attributes have correct defaults
-}
-
-void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntry& config) {
- const GLuint sampler_id = sampler.handle;
- if (mag_filter != config.mag_filter) {
- mag_filter = config.mag_filter;
- glSamplerParameteri(
- sampler_id, GL_TEXTURE_MAG_FILTER,
- MaxwellToGL::TextureFilterMode(mag_filter, Tegra::Texture::TextureMipmapFilter::None));
- }
- if (min_filter != config.min_filter || mipmap_filter != config.mipmap_filter) {
- min_filter = config.min_filter;
- mipmap_filter = config.mipmap_filter;
- glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
- MaxwellToGL::TextureFilterMode(min_filter, mipmap_filter));
- }
-
- if (wrap_u != config.wrap_u) {
- wrap_u = config.wrap_u;
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(wrap_u));
- }
- if (wrap_v != config.wrap_v) {
- wrap_v = config.wrap_v;
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
- }
- if (wrap_p != config.wrap_p) {
- wrap_p = config.wrap_p;
- glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
- }
-
- if (const bool enabled = config.depth_compare_enabled == 1; use_depth_compare != enabled) {
- use_depth_compare = enabled;
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
- use_depth_compare ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
- }
-
- if (depth_compare_func != config.depth_compare_func) {
- depth_compare_func = config.depth_compare_func;
- glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
- MaxwellToGL::DepthCompareFunc(depth_compare_func));
- }
-
- if (const auto new_border_color = config.GetBorderColor(); border_color != new_border_color) {
- border_color = new_border_color;
- glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, border_color.data());
- }
-
- if (const float anisotropic = config.GetMaxAnisotropy(); max_anisotropic != anisotropic) {
- max_anisotropic = anisotropic;
- if (GLAD_GL_ARB_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropic);
- } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, max_anisotropic);
- }
- }
-
- if (const float min = config.GetMinLod(); min_lod != min) {
- min_lod = min;
- glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, min_lod);
- }
- if (const float max = config.GetMaxLod(); max_lod != max) {
- max_lod = max;
- glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, max_lod);
- }
-
- if (const float bias = config.GetLodBias(); lod_bias != bias) {
- lod_bias = bias;
- glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, lod_bias);
- }
-}
-
void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
const Shader& shader, GLuint program_handle,
BaseBindings base_bindings) {
@@ -974,10 +882,18 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
- const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ Tegra::Texture::FullTextureInfo texture;
+ if (entry.IsBindless()) {
+ const auto cbuf = entry.GetBindlessCBuf();
+ Tegra::Texture::TextureHandle tex_handle;
+ tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
+ texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
+ } else {
+ texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
+ }
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
- texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
+ state.texture_units[current_bindpoint].sampler = sampler_cache.GetSampler(texture.tsc);
if (Surface surface = res_cache.GetTextureSurface(texture, entry); surface) {
state.texture_units[current_bindpoint].texture =
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d4c2cf80e..a0e056142 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -25,6 +25,7 @@
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
@@ -72,34 +73,6 @@ public:
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
private:
- class SamplerInfo {
- public:
- OGLSampler sampler;
-
- /// Creates the sampler object, initializing its state so that it's in sync with the
- /// SamplerInfo struct.
- void Create();
- /// Syncs the sampler object with the config, updating any necessary state.
- void SyncWithConfig(const Tegra::Texture::TSCEntry& info);
-
- private:
- Tegra::Texture::TextureFilter mag_filter = Tegra::Texture::TextureFilter::Nearest;
- Tegra::Texture::TextureFilter min_filter = Tegra::Texture::TextureFilter::Nearest;
- Tegra::Texture::TextureMipmapFilter mipmap_filter =
- Tegra::Texture::TextureMipmapFilter::None;
- Tegra::Texture::WrapMode wrap_u = Tegra::Texture::WrapMode::ClampToEdge;
- Tegra::Texture::WrapMode wrap_v = Tegra::Texture::WrapMode::ClampToEdge;
- Tegra::Texture::WrapMode wrap_p = Tegra::Texture::WrapMode::ClampToEdge;
- bool use_depth_compare = false;
- Tegra::Texture::DepthCompareFunc depth_compare_func =
- Tegra::Texture::DepthCompareFunc::Always;
- GLvec4 border_color = {};
- float min_lod = 0.0f;
- float max_lod = 16.0f;
- float lod_bias = 0.0f;
- float max_anisotropic = 1.0f;
- };
-
struct FramebufferConfigState {
bool using_color_fb{};
bool using_depth_fb{};
@@ -204,6 +177,7 @@ private:
RasterizerCacheOpenGL res_cache;
ShaderCacheOpenGL shader_cache;
GlobalRegionCacheOpenGL global_cache;
+ SamplerCacheOpenGL sampler_cache;
Core::System& system;
@@ -219,8 +193,6 @@ private:
FramebufferConfigState current_framebuffer_config_state;
std::pair<bool, bool> current_depth_stencil_usage{};
- std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
-
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
PrimitiveAssembler primitive_assembler{buffer_cache};
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
new file mode 100644
index 000000000..3ded5ecea
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_sampler_cache.h"
+#include "video_core/renderer_opengl/maxwell_to_gl.h"
+
+namespace OpenGL {
+
+SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
+
+SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
+
+OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
+ OGLSampler sampler;
+ sampler.Create();
+
+ const GLuint sampler_id{sampler.handle};
+ glSamplerParameteri(
+ sampler_id, GL_TEXTURE_MAG_FILTER,
+ MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
+ MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
+ glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
+ tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
+ glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
+ MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
+ glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
+ glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
+ glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
+ glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
+ if (GLAD_GL_ARB_texture_filter_anisotropic) {
+ glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
+ } else if (GLAD_GL_EXT_texture_filter_anisotropic) {
+ glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
+ } else if (tsc.GetMaxAnisotropy() != 1) {
+ LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
+ }
+
+ return sampler;
+}
+
+GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
+ return sampler.handle;
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.h b/src/video_core/renderer_opengl/gl_sampler_cache.h
new file mode 100644
index 000000000..defbc2d81
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.h
@@ -0,0 +1,25 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/sampler_cache.h"
+
+namespace OpenGL {
+
+class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
+public:
+ explicit SamplerCacheOpenGL();
+ ~SamplerCacheOpenGL();
+
+protected:
+ OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const;
+
+ GLuint ToSamplerType(const OGLSampler& sampler) const;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 55b3d4d7b..74032d237 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -69,6 +69,7 @@ private:
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
+ std::vector<SamplerEntry> bindless_samplers;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};
@@ -79,4 +80,4 @@ std::string GetCommonDeclarations();
ProgramResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage,
const std::string& suffix);
-} // namespace OpenGL::GLShader \ No newline at end of file
+} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index d5890a375..53752b38d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -319,16 +319,19 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u32 type{};
u8 is_array{};
u8 is_shadow{};
+ u8 is_bindless{};
if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
- file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
+ file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
+ file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
return {};
}
- entry.entries.samplers.emplace_back(
- static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
- static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
+ entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
+ static_cast<std::size_t>(index),
+ static_cast<Tegra::Shader::TextureType>(type),
+ is_array != 0, is_shadow != 0, is_bindless != 0);
}
u32 global_memory_count{};
@@ -393,7 +396,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
- file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
+ file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 ||
+ file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) {
return false;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index ed3178f09..801826d3d 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -7,7 +7,6 @@
#include <unordered_map>
#include "common/assert.h"
-#include "common/cityhash.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
@@ -28,39 +27,20 @@ static std::optional<vk::BorderColor> TryConvertBorderColor(std::array<float, 4>
}
}
-std::size_t SamplerCacheKey::Hash() const {
- static_assert(sizeof(raw) % sizeof(u64) == 0);
- return static_cast<std::size_t>(
- Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
-}
-
-bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
- return raw == rhs.raw;
-}
-
VKSamplerCache::VKSamplerCache(const VKDevice& device) : device{device} {}
VKSamplerCache::~VKSamplerCache() = default;
-vk::Sampler VKSamplerCache::GetSampler(const Tegra::Texture::TSCEntry& tsc) {
- const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
- auto& sampler = entry->second;
- if (is_cache_miss) {
- sampler = CreateSampler(tsc);
- }
- return *sampler;
-}
-
-UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) {
- const float max_anisotropy = tsc.GetMaxAnisotropy();
- const bool has_anisotropy = max_anisotropy > 1.0f;
+UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
+ const float max_anisotropy{tsc.GetMaxAnisotropy()};
+ const bool has_anisotropy{max_anisotropy > 1.0f};
- const auto border_color = tsc.GetBorderColor();
- const auto vk_border_color = TryConvertBorderColor(border_color);
+ const auto border_color{tsc.GetBorderColor()};
+ const auto vk_border_color{TryConvertBorderColor(border_color)};
UNIMPLEMENTED_IF_MSG(!vk_border_color, "Unimplemented border color {} {} {} {}",
border_color[0], border_color[1], border_color[2], border_color[3]);
- constexpr bool unnormalized_coords = false;
+ constexpr bool unnormalized_coords{false};
const vk::SamplerCreateInfo sampler_ci(
{}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
@@ -73,9 +53,13 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
unnormalized_coords);
- const auto& dld = device.GetDispatchLoader();
- const auto dev = device.GetLogical();
+ const auto& dld{device.GetDispatchLoader()};
+ const auto dev{device.GetLogical()};
return dev.createSamplerUnique(sampler_ci, nullptr, dld);
}
+vk::Sampler VKSamplerCache::ToSamplerType(const UniqueSampler& sampler) const {
+ return *sampler;
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.h b/src/video_core/renderer_vulkan/vk_sampler_cache.h
index c6394dc87..771b05c73 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.h
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.h
@@ -8,49 +8,25 @@
#include "common/common_types.h"
#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/sampler_cache.h"
#include "video_core/textures/texture.h"
namespace Vulkan {
class VKDevice;
-struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
- std::size_t Hash() const;
-
- bool operator==(const SamplerCacheKey& rhs) const;
-
- bool operator!=(const SamplerCacheKey& rhs) const {
- return !operator==(rhs);
- }
-};
-
-} // namespace Vulkan
-
-namespace std {
-
-template <>
-struct hash<Vulkan::SamplerCacheKey> {
- std::size_t operator()(const Vulkan::SamplerCacheKey& k) const noexcept {
- return k.Hash();
- }
-};
-
-} // namespace std
-
-namespace Vulkan {
-
-class VKSamplerCache {
+class VKSamplerCache final : public VideoCommon::SamplerCache<vk::Sampler, UniqueSampler> {
public:
explicit VKSamplerCache(const VKDevice& device);
~VKSamplerCache();
- vk::Sampler GetSampler(const Tegra::Texture::TSCEntry& tsc);
+protected:
+ UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const;
-private:
- UniqueSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc);
+ vk::Sampler ToSamplerType(const UniqueSampler& sampler) const;
+private:
const VKDevice& device;
- std::unordered_map<SamplerCacheKey, UniqueSampler> cache;
};
} // namespace Vulkan
diff --git a/src/video_core/sampler_cache.cpp b/src/video_core/sampler_cache.cpp
new file mode 100644
index 000000000..53c7ef12d
--- /dev/null
+++ b/src/video_core/sampler_cache.cpp
@@ -0,0 +1,21 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/cityhash.h"
+#include "common/common_types.h"
+#include "video_core/sampler_cache.h"
+
+namespace VideoCommon {
+
+std::size_t SamplerCacheKey::Hash() const {
+ static_assert(sizeof(raw) % sizeof(u64) == 0);
+ return static_cast<std::size_t>(
+ Common::CityHash64(reinterpret_cast<const char*>(raw.data()), sizeof(raw) / sizeof(u64)));
+}
+
+bool SamplerCacheKey::operator==(const SamplerCacheKey& rhs) const {
+ return raw == rhs.raw;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/sampler_cache.h b/src/video_core/sampler_cache.h
new file mode 100644
index 000000000..cbe3ad071
--- /dev/null
+++ b/src/video_core/sampler_cache.h
@@ -0,0 +1,60 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <unordered_map>
+
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+struct SamplerCacheKey final : public Tegra::Texture::TSCEntry {
+ std::size_t Hash() const;
+
+ bool operator==(const SamplerCacheKey& rhs) const;
+
+ bool operator!=(const SamplerCacheKey& rhs) const {
+ return !operator==(rhs);
+ }
+};
+
+} // namespace VideoCommon
+
+namespace std {
+
+template <>
+struct hash<VideoCommon::SamplerCacheKey> {
+ std::size_t operator()(const VideoCommon::SamplerCacheKey& k) const noexcept {
+ return k.Hash();
+ }
+};
+
+} // namespace std
+
+namespace VideoCommon {
+
+template <typename SamplerType, typename SamplerStorageType>
+class SamplerCache {
+public:
+ SamplerType GetSampler(const Tegra::Texture::TSCEntry& tsc) {
+ const auto [entry, is_cache_miss] = cache.try_emplace(SamplerCacheKey{tsc});
+ auto& sampler = entry->second;
+ if (is_cache_miss) {
+ sampler = CreateSampler(tsc);
+ }
+ return ToSamplerType(sampler);
+ }
+
+protected:
+ virtual SamplerStorageType CreateSampler(const Tegra::Texture::TSCEntry& tsc) const = 0;
+
+ virtual SamplerType ToSamplerType(const SamplerStorageType& sampler) const = 0;
+
+private:
+ std::unordered_map<SamplerCacheKey, SamplerStorageType> cache;
+};
+
+} // namespace VideoCommon \ No newline at end of file
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a775b402b..fa65ac9a9 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
-
+ bool is_bindless = false;
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
@@ -54,7 +54,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
bb, instr,
- GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
+ GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
+ break;
+ }
+ case OpCode::Id::TEX_B: {
+ UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
+ "AOFFI is not implemented");
+
+ if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
+ LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
+ }
+
+ const TextureType texture_type{instr.tex_b.texture_type};
+ const bool is_array = instr.tex_b.array != 0;
+ const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
+ const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
+ const auto process_mode = instr.tex_b.GetTextureProcessMode();
+ WriteTexInstructionFloat(bb, instr,
+ GetTexCode(instr, texture_type, process_mode, depth_compare,
+ is_array, is_aoffi, {instr.gpr20}));
break;
}
case OpCode::Id::TEXS: {
@@ -134,6 +152,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
WriteTexsInstructionFloat(bb, instr, values);
break;
}
+ case OpCode::Id::TXQ_B:
+ is_bindless = true;
+ [[fallthrough]];
case OpCode::Id::TXQ: {
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
@@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
- GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+ is_bindless
+ ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
+ false)
+ : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
u32 indexer = 0;
switch (instr.txq.query_type) {
@@ -154,7 +178,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value =
- Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
+ Operation(OperationCode::TextureQueryDimensions, meta,
+ GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
SetTemporal(bb, indexer++, value);
}
for (u32 i = 0; i < indexer; ++i) {
@@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
break;
}
+ case OpCode::Id::TMML_B:
+ is_bindless = true;
+ [[fallthrough]];
case OpCode::Id::TMML: {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
@@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+ const auto& sampler = is_bindless
+ ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
+ : GetSampler(instr.sampler, texture_type, is_array, false);
std::vector<Node> coords;
@@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
texture_type = TextureType::Texture2D;
}
-
+ u32 indexer = 0;
for (u32 element = 0; element < 2; ++element) {
+ if (!instr.tmml.IsComponentEnabled(element)) {
+ continue;
+ }
auto params = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
- SetTemporal(bb, element, value);
+ SetTemporal(bb, indexer++, value);
}
- for (u32 element = 0; element < 2; ++element) {
- SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
+ for (u32 i = 0; i < indexer; ++i) {
+ SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
-
break;
}
case OpCode::Id::TLDS: {
@@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
return *used_samplers.emplace(entry).first;
}
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
+ bool is_array, bool is_shadow) {
+ const Node sampler_register = GetRegister(reg);
+ const Node base_sampler =
+ TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
+ const auto cbuf = std::get_if<CbufNode>(base_sampler);
+ const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
+ ASSERT(cbuf_offset_imm != nullptr);
+ const auto cbuf_offset = cbuf_offset_imm->GetValue();
+ const auto cbuf_index = cbuf->GetIndex();
+ const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset;
+
+ // If this sampler has already been used, return the existing mapping.
+ const auto itr =
+ std::find_if(used_samplers.begin(), used_samplers.end(),
+ [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
+ if (itr != used_samplers.end()) {
+ ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
+ itr->IsShadow() == is_shadow);
+ return *itr;
+ }
+
+ // Otherwise create a new mapping for this sampler
+ const std::size_t next_index = used_samplers.size();
+ const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
+ return *used_samplers.emplace(entry).first;
+}
+
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
@@ -326,22 +386,27 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset,
- std::vector<Node> aoffi) {
+ std::vector<Node> aoffi,
+ std::optional<Tegra::Shader::Register> bindless_reg) {
const bool is_array = array;
const bool is_shadow = depth_compare;
+ const bool is_bindless = bindless_reg.has_value();
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
- const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+ const auto& sampler = is_bindless
+ ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
+ : GetSampler(instr.sampler, texture_type, is_array, is_shadow);
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
process_mode == TextureProcessMode::LLA;
- // LOD selection (either via bias or explicit textureLod) not supported in GL for
- // sampler2DArrayShadow and samplerCubeArrayShadow.
+ // LOD selection (either via bias or explicit textureLod) not
+ // supported in GL for sampler2DArrayShadow and
+ // samplerCubeArrayShadow.
const bool gl_lod_supported =
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
@@ -359,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
lod = Immediate(0.0f);
break;
case TextureProcessMode::LB:
- // If present, lod or bias are always stored in the register indexed by the gpr20
- // field with an offset depending on the usage of the other registers
+ // If present, lod or bias are always stored in the register
+ // indexed by the gpr20 field with an offset depending on the
+ // usage of the other registers
bias = GetRegister(instr.gpr20.Value() + bias_offset);
break;
case TextureProcessMode::LL:
@@ -384,11 +450,18 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array,
- bool is_aoffi) {
+ bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
const bool lod_bias_enabled{
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
+ const bool is_bindless = bindless_reg.has_value();
+
u64 parameter_register = instr.gpr20.Value();
+ if (is_bindless) {
+ ++parameter_register;
+ }
+
+ const u32 bias_lod_offset = (is_bindless ? 1 : 0);
if (lod_bias_enabled) {
++parameter_register;
}
@@ -423,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
dc = GetRegister(parameter_register++);
}
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
+ aoffi, bindless_reg);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -459,7 +533,8 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
dc = GetRegister(depth_register);
}
- return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
+ return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
+ {});
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 1afab08c0..57af8b10f 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -196,9 +196,23 @@ enum class ExitMethod {
class Sampler {
public:
+ // Use this constructor for bounded Samplers
explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow)
- : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
+ : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+ is_bindless{false} {}
+
+ // Use this constructor for bindless Samplers
+ explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
+ Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
+ : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
+ is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
+
+ // Use this only for serialization/deserialization
+ explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
+ bool is_array, bool is_shadow, bool is_bindless)
+ : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
+ is_bindless{is_bindless} {}
std::size_t GetOffset() const {
return offset;
@@ -220,6 +234,14 @@ public:
return is_shadow;
}
+ bool IsBindless() const {
+ return is_bindless;
+ }
+
+ std::pair<u32, u32> GetBindlessCBuf() const {
+ return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
+ }
+
bool operator<(const Sampler& rhs) const {
return std::tie(offset, index, type, is_array, is_shadow) <
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
@@ -231,8 +253,9 @@ private:
std::size_t offset{};
std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
- bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
- bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+ bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
+ bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
+ bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
};
class ConstBuffer {
@@ -735,6 +758,11 @@ private:
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
+ // Accesses a texture sampler for a bindless texture.
+ const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
+ Tegra::Shader::TextureType type, bool is_array,
+ bool is_shadow);
+
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@@ -748,7 +776,8 @@ private:
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
- bool is_array, bool is_aoffi);
+ bool is_array, bool is_aoffi,
+ std::optional<Tegra::Shader::Register> bindless_reg);
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
@@ -768,7 +797,8 @@ private:
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
- Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
+ Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
+ std::optional<Tegra::Shader::Register> bindless_reg);
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
u64 byte_height);
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index cb82ecf3f..60cda0ca3 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -5,6 +5,8 @@
#include <memory>
#include "core/core.h"
#include "core/settings.h"
+#include "video_core/gpu_asynch.h"
+#include "video_core/gpu_synch.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/video_core.h"
@@ -16,6 +18,14 @@ std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_wind
return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system);
}
+std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {
+ if (Settings::values.use_asynchronous_gpu_emulation) {
+ return std::make_unique<VideoCommon::GPUAsynch>(system, system.Renderer());
+ }
+
+ return std::make_unique<VideoCommon::GPUSynch>(system, system.Renderer());
+}
+
u16 GetResolutionScaleFactor(const RendererBase& renderer) {
return static_cast<u16>(
Settings::values.resolution_factor
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index 3c583f195..b8e0ac372 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -14,6 +14,10 @@ namespace Core::Frontend {
class EmuWindow;
}
+namespace Tegra {
+class GPU;
+}
+
namespace VideoCore {
class RendererBase;
@@ -27,6 +31,9 @@ class RendererBase;
std::unique_ptr<RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
Core::System& system);
+/// Creates an emulated GPU instance using the given system context.
+std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system);
+
u16 GetResolutionScaleFactor(const RendererBase& renderer);
} // namespace VideoCore
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp
index c29f2d2dc..7eed9fcf3 100644
--- a/src/yuzu/bootmanager.cpp
+++ b/src/yuzu/bootmanager.cpp
@@ -91,8 +91,8 @@ void EmuThread::run() {
class GGLContext : public Core::Frontend::GraphicsContext {
public:
- explicit GGLContext(QOpenGLContext* shared_context) : surface() {
- context = std::make_unique<QOpenGLContext>(shared_context);
+ explicit GGLContext(QOpenGLContext* shared_context)
+ : context{std::make_unique<QOpenGLContext>(shared_context)} {
surface.setFormat(shared_context->format());
surface.create();
}
@@ -186,8 +186,7 @@ private:
};
GRenderWindow::GRenderWindow(QWidget* parent, EmuThread* emu_thread)
- : QWidget(parent), child(nullptr), context(nullptr), emu_thread(emu_thread) {
-
+ : QWidget(parent), emu_thread(emu_thread) {
setWindowTitle(QStringLiteral("yuzu %1 | %2-%3")
.arg(Common::g_build_name, Common::g_scm_branch, Common::g_scm_desc));
setAttribute(Qt::WA_AcceptTouchEvents);
diff --git a/src/yuzu/bootmanager.h b/src/yuzu/bootmanager.h
index 9608b959f..3df33aca1 100644
--- a/src/yuzu/bootmanager.h
+++ b/src/yuzu/bootmanager.h
@@ -10,7 +10,6 @@
#include <QImage>
#include <QThread>
#include <QWidget>
-#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"