summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt2
-rw-r--r--src/audio_core/audio_renderer.cpp12
-rw-r--r--src/audio_core/audio_renderer.h5
-rw-r--r--src/audio_core/cubeb_sink.cpp6
-rw-r--r--src/common/thread_queue_list.h10
-rw-r--r--src/core/core_cpu.cpp3
-rw-r--r--src/core/core_cpu.h2
-rw-r--r--src/core/core_timing.cpp2
-rw-r--r--src/core/file_sys/partition_filesystem.h2
-rw-r--r--src/core/file_sys/program_metadata.h2
-rw-r--r--src/core/file_sys/vfs.h20
-rw-r--r--src/core/file_sys/vfs_offset.h3
-rw-r--r--src/core/file_sys/vfs_vector.h3
-rw-r--r--src/core/hle/kernel/scheduler.cpp2
-rw-r--r--src/core/hle/kernel/scheduler.h2
-rw-r--r--src/core/hle/kernel/svc.cpp3
-rw-r--r--src/core/hle/kernel/thread.cpp32
-rw-r--r--src/core/hle/service/audio/audren_u.cpp44
-rw-r--r--src/core/hle/service/audio/audren_u.h1
-rw-r--r--src/core/hle/service/hid/hid.cpp17
-rw-r--r--src/core/hle/service/service.h2
-rw-r--r--src/core/loader/loader.cpp2
-rw-r--r--src/core/loader/loader.h2
-rw-r--r--src/video_core/engines/shader_bytecode.h29
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp192
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp95
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_state.h6
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp201
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.h42
31 files changed, 433 insertions, 349 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e7fea4fbf..ff8385e3a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -188,7 +188,7 @@ if (ENABLE_SDL2)
if (YUZU_USE_BUNDLED_SDL2)
# Detect toolchain and platform
if ((MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) AND ARCHITECTURE_x86_64)
- set(SDL2_VER "SDL2-2.0.5")
+ set(SDL2_VER "SDL2-2.0.8")
else()
message(FATAL_ERROR "No bundled SDL2 binaries for your toolchain. Disable YUZU_USE_BUNDLED_SDL2 and provide your own.")
endif()
diff --git a/src/audio_core/audio_renderer.cpp b/src/audio_core/audio_renderer.cpp
index 282f345c5..6ebed3fb0 100644
--- a/src/audio_core/audio_renderer.cpp
+++ b/src/audio_core/audio_renderer.cpp
@@ -26,6 +26,18 @@ AudioRenderer::AudioRenderer(AudioRendererParameter params,
QueueMixedBuffer(2);
}
+u32 AudioRenderer::GetSampleRate() const {
+ return worker_params.sample_rate;
+}
+
+u32 AudioRenderer::GetSampleCount() const {
+ return worker_params.sample_count;
+}
+
+u32 AudioRenderer::GetMixBufferCount() const {
+ return worker_params.mix_buffer_count;
+}
+
std::vector<u8> AudioRenderer::UpdateAudioRenderer(const std::vector<u8>& input_params) {
// Copy UpdateDataHeader struct
UpdateDataHeader config{};
diff --git a/src/audio_core/audio_renderer.h b/src/audio_core/audio_renderer.h
index 6950a4681..13c5d0adc 100644
--- a/src/audio_core/audio_renderer.h
+++ b/src/audio_core/audio_renderer.h
@@ -26,7 +26,7 @@ enum class PlayState : u8 {
struct AudioRendererParameter {
u32_le sample_rate;
u32_le sample_count;
- u32_le unknown_8;
+ u32_le mix_buffer_count;
u32_le unknown_c;
u32_le voice_count;
u32_le sink_count;
@@ -160,6 +160,9 @@ public:
std::vector<u8> UpdateAudioRenderer(const std::vector<u8>& input_params);
void QueueMixedBuffer(Buffer::Tag tag);
void ReleaseAndQueueBuffers();
+ u32 GetSampleRate() const;
+ u32 GetSampleCount() const;
+ u32 GetMixBufferCount() const;
private:
class VoiceState {
diff --git a/src/audio_core/cubeb_sink.cpp b/src/audio_core/cubeb_sink.cpp
index 1501ef1f4..5a1177d0c 100644
--- a/src/audio_core/cubeb_sink.cpp
+++ b/src/audio_core/cubeb_sink.cpp
@@ -4,6 +4,7 @@
#include <algorithm>
#include <cstring>
+#include <mutex>
#include "audio_core/cubeb_sink.h"
#include "audio_core/stream.h"
@@ -66,6 +67,8 @@ public:
return;
}
+ std::lock_guard lock{queue_mutex};
+
queue.reserve(queue.size() + samples.size() * GetNumChannels());
if (is_6_channel) {
@@ -94,6 +97,7 @@ private:
u32 num_channels{};
bool is_6_channel{};
+ std::mutex queue_mutex;
std::vector<s16> queue;
static long DataCallback(cubeb_stream* stream, void* user_data, const void* input_buffer,
@@ -153,6 +157,8 @@ long SinkStreamImpl::DataCallback(cubeb_stream* stream, void* user_data, const v
return {};
}
+ std::lock_guard lock{impl->queue_mutex};
+
const size_t frames_to_write{
std::min(impl->queue.size() / impl->GetNumChannels(), static_cast<size_t>(num_frames))};
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index 38a450d69..133122c5f 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -16,7 +16,7 @@ struct ThreadQueueList {
// (dynamically resizable) circular buffers to remove their overhead when
// inserting and popping.
- typedef unsigned int Priority;
+ using Priority = unsigned int;
// Number of priority levels. (Valid levels are [0..NUM_QUEUES).)
static const Priority NUM_QUEUES = N;
@@ -26,9 +26,9 @@ struct ThreadQueueList {
}
// Only for debugging, returns priority level.
- Priority contains(const T& uid) {
+ Priority contains(const T& uid) const {
for (Priority i = 0; i < NUM_QUEUES; ++i) {
- Queue& cur = queues[i];
+ const Queue& cur = queues[i];
if (std::find(cur.data.cbegin(), cur.data.cend(), uid) != cur.data.cend()) {
return i;
}
@@ -37,8 +37,8 @@ struct ThreadQueueList {
return -1;
}
- T get_first() {
- Queue* cur = first;
+ T get_first() const {
+ const Queue* cur = first;
while (cur != nullptr) {
if (!cur->data.empty()) {
return cur->data.front();
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 3f1c70624..b042ee02b 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -14,6 +14,7 @@
#include "core/core_timing.h"
#include "core/hle/kernel/scheduler.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/lock.h"
#include "core/settings.h"
namespace Core {
@@ -126,6 +127,8 @@ void Cpu::Reschedule() {
}
reschedule_pending = false;
+ // Lock the global kernel mutex when we manipulate the HLE state
+ std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
scheduler->Reschedule();
}
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 976952903..56cdae194 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -79,7 +79,7 @@ private:
std::shared_ptr<CpuBarrier> cpu_barrier;
std::shared_ptr<Kernel::Scheduler> scheduler;
- bool reschedule_pending{};
+ std::atomic<bool> reschedule_pending = false;
size_t core_index;
};
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index d3bb6f818..f977d1b32 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -135,11 +135,9 @@ void ClearPendingEvents() {
void ScheduleEvent(s64 cycles_into_future, const EventType* event_type, u64 userdata) {
ASSERT(event_type != nullptr);
s64 timeout = GetTicks() + cycles_into_future;
-
// If this event needs to be scheduled before the next advance(), force one early
if (!is_global_timer_sane)
ForceExceptionCheck(cycles_into_future);
-
event_queue.emplace_back(Event{timeout, event_fifo_id++, userdata, event_type});
std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
}
diff --git a/src/core/file_sys/partition_filesystem.h b/src/core/file_sys/partition_filesystem.h
index 7c7a75816..be7bc32a8 100644
--- a/src/core/file_sys/partition_filesystem.h
+++ b/src/core/file_sys/partition_filesystem.h
@@ -13,7 +13,7 @@
#include "core/file_sys/vfs.h"
namespace Loader {
-enum class ResultStatus;
+enum class ResultStatus : u16;
}
namespace FileSys {
diff --git a/src/core/file_sys/program_metadata.h b/src/core/file_sys/program_metadata.h
index 06a7315db..74a91052b 100644
--- a/src/core/file_sys/program_metadata.h
+++ b/src/core/file_sys/program_metadata.h
@@ -13,7 +13,7 @@
#include "partition_filesystem.h"
namespace Loader {
-enum class ResultStatus;
+enum class ResultStatus : u16;
}
namespace FileSys {
diff --git a/src/core/file_sys/vfs.h b/src/core/file_sys/vfs.h
index 141a053ce..78a63c59b 100644
--- a/src/core/file_sys/vfs.h
+++ b/src/core/file_sys/vfs.h
@@ -15,9 +15,9 @@
namespace FileSys {
-struct VfsFilesystem;
-struct VfsFile;
-struct VfsDirectory;
+class VfsDirectory;
+class VfsFile;
+class VfsFilesystem;
// Convenience typedefs to use Vfs* interfaces
using VirtualFilesystem = std::shared_ptr<VfsFilesystem>;
@@ -34,8 +34,9 @@ enum class VfsEntryType {
// A class representing an abstract filesystem. A default implementation given the root VirtualDir
// is provided for convenience, but if the Vfs implementation has any additional state or
// functionality, they will need to override.
-struct VfsFilesystem : NonCopyable {
- VfsFilesystem(VirtualDir root);
+class VfsFilesystem : NonCopyable {
+public:
+ explicit VfsFilesystem(VirtualDir root);
virtual ~VfsFilesystem();
// Gets the friendly name for the filesystem.
@@ -81,7 +82,8 @@ protected:
};
// A class representing a file in an abstract filesystem.
-struct VfsFile : NonCopyable {
+class VfsFile : NonCopyable {
+public:
virtual ~VfsFile();
// Retrieves the file name.
@@ -179,7 +181,8 @@ struct VfsFile : NonCopyable {
};
// A class representing a directory in an abstract filesystem.
-struct VfsDirectory : NonCopyable {
+class VfsDirectory : NonCopyable {
+public:
virtual ~VfsDirectory();
// Retrives the file located at path as if the current directory was root. Returns nullptr if
@@ -295,7 +298,8 @@ protected:
// A convenience partial-implementation of VfsDirectory that stubs out methods that should only work
// if writable. This is to avoid redundant empty methods everywhere.
-struct ReadOnlyVfsDirectory : public VfsDirectory {
+class ReadOnlyVfsDirectory : public VfsDirectory {
+public:
bool IsWritable() const override;
bool IsReadable() const override;
std::shared_ptr<VfsDirectory> CreateSubdirectory(std::string_view name) override;
diff --git a/src/core/file_sys/vfs_offset.h b/src/core/file_sys/vfs_offset.h
index 235970dc5..cb92d1570 100644
--- a/src/core/file_sys/vfs_offset.h
+++ b/src/core/file_sys/vfs_offset.h
@@ -15,7 +15,8 @@ namespace FileSys {
// Similar to seeking to an offset.
// If the file is writable, operations that would write past the end of the offset file will expand
// the size of this wrapper.
-struct OffsetVfsFile : public VfsFile {
+class OffsetVfsFile : public VfsFile {
+public:
OffsetVfsFile(std::shared_ptr<VfsFile> file, size_t size, size_t offset = 0,
std::string new_name = "", VirtualDir new_parent = nullptr);
diff --git a/src/core/file_sys/vfs_vector.h b/src/core/file_sys/vfs_vector.h
index ba469647b..b3b468233 100644
--- a/src/core/file_sys/vfs_vector.h
+++ b/src/core/file_sys/vfs_vector.h
@@ -10,7 +10,8 @@ namespace FileSys {
// An implementation of VfsDirectory that maintains two vectors for subdirectories and files.
// Vector data is supplied upon construction.
-struct VectorVfsDirectory : public VfsDirectory {
+class VectorVfsDirectory : public VfsDirectory {
+public:
explicit VectorVfsDirectory(std::vector<VirtualFile> files = {},
std::vector<VirtualDir> dirs = {}, VirtualDir parent = nullptr,
std::string name = "");
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index 94065c736..e770b9103 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -25,7 +25,7 @@ Scheduler::~Scheduler() {
}
}
-bool Scheduler::HaveReadyThreads() {
+bool Scheduler::HaveReadyThreads() const {
std::lock_guard<std::mutex> lock(scheduler_mutex);
return ready_queue.get_first() != nullptr;
}
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 1a4ee8f36..6a61ef64e 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -21,7 +21,7 @@ public:
~Scheduler();
/// Returns whether there are any threads that are ready to run.
- bool HaveReadyThreads();
+ bool HaveReadyThreads() const;
/// Reschedules to the next available thread (call after current thread is suspended)
void Reschedule();
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 4ca481513..b24f409b3 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -705,8 +705,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
auto owner = g_handle_table.Get<Thread>(owner_handle);
ASSERT(owner);
- ASSERT(thread->status != ThreadStatus::Running);
- thread->status = ThreadStatus::WaitMutex;
+ ASSERT(thread->status == ThreadStatus::WaitMutex);
thread->wakeup_callback = nullptr;
owner->AddMutexWaiter(thread);
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index b9022feae..a1a7867ce 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -23,6 +23,7 @@
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/process.h"
#include "core/hle/kernel/thread.h"
+#include "core/hle/lock.h"
#include "core/hle/result.h"
#include "core/memory.h"
@@ -104,6 +105,10 @@ void ExitCurrentThread() {
*/
static void ThreadWakeupCallback(u64 thread_handle, int cycles_late) {
const auto proper_handle = static_cast<Handle>(thread_handle);
+
+ // Lock the global kernel mutex when we enter the kernel HLE.
+ std::lock_guard<std::recursive_mutex> lock(HLE::g_hle_lock);
+
SharedPtr<Thread> thread = wakeup_callback_handle_table.Get<Thread>(proper_handle);
if (thread == nullptr) {
LOG_CRITICAL(Kernel, "Callback fired for invalid thread {:08X}", proper_handle);
@@ -155,8 +160,10 @@ void Thread::WakeAfterDelay(s64 nanoseconds) {
if (nanoseconds == -1)
return;
- CoreTiming::ScheduleEvent(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType,
- callback_handle);
+ // This function might be called from any thread so we have to be cautious and use the
+ // thread-safe version of ScheduleEvent.
+ CoreTiming::ScheduleEventThreadsafe(CoreTiming::nsToCycles(nanoseconds), ThreadWakeupEventType,
+ callback_handle);
}
void Thread::CancelWakeupTimer() {
@@ -419,12 +426,33 @@ VAddr Thread::GetCommandBufferAddress() const {
}
void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
+ if (thread->lock_owner == this) {
+ // If the thread is already waiting for this thread to release the mutex, ensure that the
+ // waiters list is consistent and return without doing anything.
+ auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+ ASSERT(itr != wait_mutex_threads.end());
+ return;
+ }
+
+ // A thread can't wait on two different mutexes at the same time.
+ ASSERT(thread->lock_owner == nullptr);
+
+ // Ensure that the thread is not already in the list of mutex waiters
+ auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+ ASSERT(itr == wait_mutex_threads.end());
+
thread->lock_owner = this;
wait_mutex_threads.emplace_back(std::move(thread));
UpdatePriority();
}
void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
+ ASSERT(thread->lock_owner == this);
+
+ // Ensure that the thread is in the list of mutex waiters
+ auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
+ ASSERT(itr != wait_mutex_threads.end());
+
boost::remove_erase(wait_mutex_threads, thread);
thread->lock_owner = nullptr;
UpdatePriority();
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index f99304de5..9e75eb3a6 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -20,9 +20,9 @@ public:
explicit IAudioRenderer(AudioCore::AudioRendererParameter audren_params)
: ServiceFramework("IAudioRenderer") {
static const FunctionInfo functions[] = {
- {0, nullptr, "GetAudioRendererSampleRate"},
- {1, nullptr, "GetAudioRendererSampleCount"},
- {2, nullptr, "GetAudioRendererMixBufferCount"},
+ {0, &IAudioRenderer::GetAudioRendererSampleRate, "GetAudioRendererSampleRate"},
+ {1, &IAudioRenderer::GetAudioRendererSampleCount, "GetAudioRendererSampleCount"},
+ {2, &IAudioRenderer::GetAudioRendererMixBufferCount, "GetAudioRendererMixBufferCount"},
{3, nullptr, "GetAudioRendererState"},
{4, &IAudioRenderer::RequestUpdateAudioRenderer, "RequestUpdateAudioRenderer"},
{5, &IAudioRenderer::StartAudioRenderer, "StartAudioRenderer"},
@@ -45,6 +45,27 @@ private:
system_event->Signal();
}
+ void GetAudioRendererSampleRate(Kernel::HLERequestContext& ctx) {
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u32>(renderer->GetSampleRate());
+ LOG_DEBUG(Service_Audio, "called");
+ }
+
+ void GetAudioRendererSampleCount(Kernel::HLERequestContext& ctx) {
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u32>(renderer->GetSampleCount());
+ LOG_DEBUG(Service_Audio, "called");
+ }
+
+ void GetAudioRendererMixBufferCount(Kernel::HLERequestContext& ctx) {
+ IPC::ResponseBuilder rb{ctx, 3};
+ rb.Push(RESULT_SUCCESS);
+ rb.Push<u32>(renderer->GetMixBufferCount());
+ LOG_DEBUG(Service_Audio, "called");
+ }
+
void RequestUpdateAudioRenderer(Kernel::HLERequestContext& ctx) {
ctx.WriteBuffer(renderer->UpdateAudioRenderer(ctx.ReadBuffer()));
IPC::ResponseBuilder rb{ctx, 2};
@@ -169,7 +190,8 @@ AudRenU::AudRenU() : ServiceFramework("audren:u") {
{1, &AudRenU::GetAudioRendererWorkBufferSize, "GetAudioRendererWorkBufferSize"},
{2, &AudRenU::GetAudioDevice, "GetAudioDevice"},
{3, nullptr, "OpenAudioRendererAuto"},
- {4, nullptr, "GetAudioDeviceServiceWithRevisionInfo"},
+ {4, &AudRenU::GetAudioDeviceServiceWithRevisionInfo,
+ "GetAudioDeviceServiceWithRevisionInfo"},
};
RegisterHandlers(functions);
}
@@ -189,7 +211,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
IPC::RequestParser rp{ctx};
auto params = rp.PopRaw<AudioCore::AudioRendererParameter>();
- u64 buffer_sz = Common::AlignUp(4 * params.unknown_8, 0x40);
+ u64 buffer_sz = Common::AlignUp(4 * params.mix_buffer_count, 0x40);
buffer_sz += params.unknown_c * 1024;
buffer_sz += 0x940 * (params.unknown_c + 1);
buffer_sz += 0x3F0 * params.voice_count;
@@ -197,7 +219,7 @@ void AudRenU::GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx) {
buffer_sz += Common::AlignUp(8 * params.voice_count, 0x10);
buffer_sz +=
Common::AlignUp((0x3C0 * (params.sink_count + params.unknown_c) + 4 * params.sample_count) *
- (params.unknown_8 + 6),
+ (params.mix_buffer_count + 6),
0x40);
if (IsFeatureSupported(AudioFeatures::Splitter, params.revision)) {
@@ -253,6 +275,16 @@ void AudRenU::GetAudioDevice(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_Audio, "called");
}
+void AudRenU::GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx) {
+ IPC::ResponseBuilder rb{ctx, 2, 0, 1};
+
+ rb.Push(RESULT_SUCCESS);
+ rb.PushIpcInterface<Audio::IAudioDevice>();
+
+ LOG_WARNING(Service_Audio, "(STUBBED) called"); // TODO(ogniK): Figure out what is different
+ // based on the current revision
+}
+
bool AudRenU::IsFeatureSupported(AudioFeatures feature, u32_le revision) const {
u32_be version_num = (revision - Common::MakeMagic('R', 'E', 'V', '0')); // Byte swap
switch (feature) {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 14907f8ae..8600ac6e4 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -22,6 +22,7 @@ private:
void OpenAudioRenderer(Kernel::HLERequestContext& ctx);
void GetAudioRendererWorkBufferSize(Kernel::HLERequestContext& ctx);
void GetAudioDevice(Kernel::HLERequestContext& ctx);
+ void GetAudioDeviceServiceWithRevisionInfo(Kernel::HLERequestContext& ctx);
enum class AudioFeatures : u32 {
Splitter,
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index dcdfa0e19..970942d3f 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -291,6 +291,7 @@ private:
class Hid final : public ServiceFramework<Hid> {
public:
Hid() : ServiceFramework("hid") {
+ // clang-format off
static const FunctionInfo functions[] = {
{0, &Hid::CreateAppletResource, "CreateAppletResource"},
{1, &Hid::ActivateDebugPad, "ActivateDebugPad"},
@@ -333,15 +334,13 @@ public:
{102, &Hid::SetSupportedNpadIdType, "SetSupportedNpadIdType"},
{103, &Hid::ActivateNpad, "ActivateNpad"},
{104, nullptr, "DeactivateNpad"},
- {106, &Hid::AcquireNpadStyleSetUpdateEventHandle,
- "AcquireNpadStyleSetUpdateEventHandle"},
- {107, nullptr, "DisconnectNpad"},
+ {106, &Hid::AcquireNpadStyleSetUpdateEventHandle, "AcquireNpadStyleSetUpdateEventHandle"},
+ {107, &Hid::DisconnectNpad, "DisconnectNpad"},
{108, &Hid::GetPlayerLedPattern, "GetPlayerLedPattern"},
{109, nullptr, "ActivateNpadWithRevision"},
{120, &Hid::SetNpadJoyHoldType, "SetNpadJoyHoldType"},
{121, &Hid::GetNpadJoyHoldType, "GetNpadJoyHoldType"},
- {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault,
- "SetNpadJoyAssignmentModeSingleByDefault"},
+ {122, &Hid::SetNpadJoyAssignmentModeSingleByDefault, "SetNpadJoyAssignmentModeSingleByDefault"},
{123, nullptr, "SetNpadJoyAssignmentModeSingleByDefault"},
{124, &Hid::SetNpadJoyAssignmentModeDual, "SetNpadJoyAssignmentModeDual"},
{125, &Hid::MergeSingleJoyAsDualJoy, "MergeSingleJoyAsDualJoy"},
@@ -398,6 +397,8 @@ public:
{1000, nullptr, "SetNpadCommunicationMode"},
{1001, nullptr, "GetNpadCommunicationMode"},
};
+ // clang-format on
+
RegisterHandlers(functions);
event = Kernel::Event::Create(Kernel::ResetType::OneShot, "hid:EventHandle");
@@ -496,6 +497,12 @@ private:
LOG_WARNING(Service_HID, "(STUBBED) called");
}
+ void DisconnectNpad(Kernel::HLERequestContext& ctx) {
+ IPC::ResponseBuilder rb{ctx, 2};
+ rb.Push(RESULT_SUCCESS);
+ LOG_WARNING(Service_HID, "(STUBBED) called");
+ }
+
void GetPlayerLedPattern(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 2};
rb.Push(RESULT_SUCCESS);
diff --git a/src/core/hle/service/service.h b/src/core/hle/service/service.h
index 8a294c0f2..cd9c74f3d 100644
--- a/src/core/hle/service/service.h
+++ b/src/core/hle/service/service.h
@@ -23,7 +23,7 @@ class HLERequestContext;
} // namespace Kernel
namespace FileSys {
-struct VfsFilesystem;
+class VfsFilesystem;
}
namespace Service {
diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp
index 2f5bfc67c..1f2f31535 100644
--- a/src/core/loader/loader.cpp
+++ b/src/core/loader/loader.cpp
@@ -126,7 +126,7 @@ constexpr std::array<const char*, 36> RESULT_MESSAGES{
};
std::string GetMessageForResultStatus(ResultStatus status) {
- return GetMessageForResultStatus(static_cast<size_t>(status));
+ return GetMessageForResultStatus(static_cast<u16>(status));
}
std::string GetMessageForResultStatus(u16 status) {
diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h
index cfdadbee3..285363549 100644
--- a/src/core/loader/loader.h
+++ b/src/core/loader/loader.h
@@ -56,7 +56,7 @@ FileType GuessFromFilename(const std::string& name);
std::string GetFileTypeString(FileType type);
/// Return type for functions in Loader namespace
-enum class ResultStatus {
+enum class ResultStatus : u16 {
Success,
ErrorAlreadyLoaded,
ErrorNotImplemented,
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 9f64b248b..2526ebf28 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -200,6 +200,14 @@ enum class IMinMaxExchange : u64 {
XHi = 3,
};
+enum class XmadMode : u64 {
+ None = 0,
+ CLo = 1,
+ CHi = 2,
+ CSfu = 3,
+ CBcc = 4,
+};
+
enum class FlowCondition : u64 {
Always = 0xF,
Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
@@ -457,6 +465,18 @@ union Instruction {
} bra;
union {
+ BitField<20, 16, u64> imm20_16;
+ BitField<36, 1, u64> product_shift_left;
+ BitField<37, 1, u64> merge_37;
+ BitField<48, 1, u64> sign_a;
+ BitField<49, 1, u64> sign_b;
+ BitField<50, 3, XmadMode> mode;
+ BitField<52, 1, u64> high_b;
+ BitField<53, 1, u64> high_a;
+ BitField<56, 1, u64> merge_56;
+ } xmad;
+
+ union {
BitField<20, 14, u64> offset;
BitField<34, 5, u64> index;
} cbuf34;
@@ -593,6 +613,7 @@ public:
IntegerSetPredicate,
PredicateSetPredicate,
Conversion,
+ Xmad,
Unknown,
};
@@ -782,10 +803,10 @@ private:
INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
- INST("0011011-00------", Id::XMAD_IMM, Type::Arithmetic, "XMAD_IMM"),
- INST("0100111---------", Id::XMAD_CR, Type::Arithmetic, "XMAD_CR"),
- INST("010100010-------", Id::XMAD_RC, Type::Arithmetic, "XMAD_RC"),
- INST("0101101100------", Id::XMAD_RR, Type::Arithmetic, "XMAD_RR"),
+ INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
+ INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
+ INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"),
+ INST("0101101100------", Id::XMAD_RR, Type::Xmad, "XMAD_RR"),
};
#undef INST
std::stable_sort(table.begin(), table.end(), [](const auto& a, const auto& b) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 38a7b1413..52a649e2f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -36,30 +36,21 @@ MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
-RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_window{window} {
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window)
+ : emu_window{window}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) {
// Create sampler objects
for (size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
- // Create SSBOs
- for (size_t stage = 0; stage < ssbos.size(); ++stage) {
- for (size_t buffer = 0; buffer < ssbos[stage].size(); ++buffer) {
- ssbos[stage][buffer].Create();
- state.draw.const_buffers[stage][buffer].ssbo = ssbos[stage][buffer].handle;
- }
- }
-
GLint ext_num;
glGetIntegerv(GL_NUM_EXTENSIONS, &ext_num);
for (GLint i = 0; i < ext_num; i++) {
const std::string_view extension{
reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i))};
- if (extension == "GL_ARB_buffer_storage") {
- has_ARB_buffer_storage = true;
- } else if (extension == "GL_ARB_direct_state_access") {
+ if (extension == "GL_ARB_direct_state_access") {
has_ARB_direct_state_access = true;
} else if (extension == "GL_ARB_separate_shader_objects") {
has_ARB_separate_shader_objects = true;
@@ -86,47 +77,31 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window) : emu_wind
hw_vao.Create();
- stream_buffer = OGLStreamBuffer::MakeBuffer(has_ARB_buffer_storage, GL_ARRAY_BUFFER);
- stream_buffer->Create(STREAM_BUFFER_SIZE, STREAM_BUFFER_SIZE / 2);
- state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.draw.vertex_buffer = stream_buffer.GetHandle();
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0;
state.draw.vertex_array = hw_vao.handle;
state.Apply();
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer->GetHandle());
-
- for (unsigned index = 0; index < uniform_buffers.size(); ++index) {
- auto& buffer = uniform_buffers[index];
- buffer.Create();
- glBindBuffer(GL_UNIFORM_BUFFER, buffer.handle);
- glBufferData(GL_UNIFORM_BUFFER, sizeof(GLShader::MaxwellUniformData), nullptr,
- GL_STREAM_COPY);
- glBindBufferBase(GL_UNIFORM_BUFFER, index, buffer.handle);
- }
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle());
glEnable(GL_BLEND);
+ glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
+
LOG_CRITICAL(Render_OpenGL, "Sync fixed function OpenGL state here!");
}
-RasterizerOpenGL::~RasterizerOpenGL() {
- if (stream_buffer != nullptr) {
- state.draw.vertex_buffer = stream_buffer->GetHandle();
- state.Apply();
- stream_buffer->Release();
- }
-}
+RasterizerOpenGL::~RasterizerOpenGL() {}
std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VAO);
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
- const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
state.draw.vertex_array = hw_vao.handle;
- state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.draw.vertex_buffer = stream_buffer.GetHandle();
state.Apply();
// Upload all guest vertex arrays sequentially to our buffer
@@ -141,16 +116,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
ASSERT(end > start);
u64 size = end - start + 1;
- // Copy vertex array data
- Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
+ GLintptr vertex_buffer_offset;
+ std::tie(array_ptr, buffer_offset, vertex_buffer_offset) =
+ UploadMemory(array_ptr, buffer_offset, start, size);
// Bind the vertex array to the buffer at the current offset.
- glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride);
+ glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset,
+ vertex_array.stride);
ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented");
-
- array_ptr += size;
- buffer_offset += size;
}
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
@@ -201,22 +175,12 @@ static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program
return program_code;
}
-void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
- // Helper function for uploading uniform data
- const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
- if (has_ARB_direct_state_access) {
- glCopyNamedBufferSubData(stream_buffer->GetHandle(), handle, offset, 0, size);
- } else {
- glBindBuffer(GL_COPY_WRITE_BUFFER, handle);
- glCopyBufferSubData(GL_ARRAY_BUFFER, GL_COPY_WRITE_BUFFER, offset, 0, size);
- }
- };
-
+std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
- u32 current_constbuffer_bindpoint = static_cast<u32>(uniform_buffers.size());
+ u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
u32 current_texture_bindpoint = 0;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@@ -228,22 +192,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
continue;
}
+ std::tie(buffer_ptr, buffer_offset) =
+ AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment));
+
const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu.state.shader_stages[stage]);
std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
- // Flush the buffer so that the GPU can see the data we just wrote.
- glFlushMappedBufferRange(GL_ARRAY_BUFFER, buffer_offset, sizeof(ubo));
-
- // Upload uniform data as one UBO per stage
- const GLintptr ubo_offset = buffer_offset;
- copy_buffer(uniform_buffers[stage].handle, ubo_offset,
- sizeof(GLShader::MaxwellUniformData));
+ // Bind the buffer
+ glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset,
+ sizeof(ubo));
- buffer_ptr += sizeof(GLShader::MaxwellUniformData);
- buffer_offset += sizeof(GLShader::MaxwellUniformData);
+ buffer_ptr += sizeof(ubo);
+ buffer_offset += sizeof(ubo);
GLShader::ShaderSetup setup{GetShaderProgramCode(program)};
GLShader::ShaderEntries shader_resources;
@@ -282,9 +245,9 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
static_cast<Maxwell::ShaderStage>(stage));
// Configure the const buffers for this shader stage.
- current_constbuffer_bindpoint =
- SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
- current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
+ std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = SetupConstBuffers(
+ buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
+ current_constbuffer_bindpoint, shader_resources.const_buffer_entries);
// Configure the textures for this shader stage.
current_texture_bindpoint =
@@ -299,6 +262,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
}
shader_program_manager->UseTrivialGeometryShader();
+
+ return {buffer_ptr, buffer_offset};
}
size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -432,6 +397,31 @@ void RasterizerOpenGL::Clear() {
}
}
+std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset,
+ size_t alignment) {
+ // Align the offset, not the mapped pointer
+ GLintptr offset_aligned =
+ static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
+ return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned};
+}
+
+std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr,
+ GLintptr buffer_offset,
+ Tegra::GPUVAddr gpu_addr,
+ size_t size, size_t alignment) {
+ std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment);
+ GLintptr uploaded_offset = buffer_offset;
+
+ const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
+ const boost::optional<VAddr> cpu_addr{memory_manager->GpuToCpuAddress(gpu_addr)};
+ Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
+
+ buffer_ptr += size;
+ buffer_offset += size;
+
+ return {buffer_ptr, buffer_offset, uploaded_offset};
+}
+
void RasterizerOpenGL::DrawArrays() {
if (accelerate_draw == AccelDraw::Disabled)
return;
@@ -456,7 +446,7 @@ void RasterizerOpenGL::DrawArrays() {
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
- state.draw.vertex_buffer = stream_buffer->GetHandle();
+ state.draw.vertex_buffer = stream_buffer.GetHandle();
state.Apply();
size_t buffer_size = CalculateVertexArraysSize();
@@ -466,41 +456,31 @@ void RasterizerOpenGL::DrawArrays() {
}
// Uniform space for the 5 shader stages
- buffer_size = Common::AlignUp<size_t>(buffer_size, 4) +
- sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage;
+ buffer_size =
+ Common::AlignUp<size_t>(buffer_size, 4) +
+ (sizeof(GLShader::MaxwellUniformData) + uniform_buffer_alignment) * Maxwell::MaxShaderStage;
+
+ // Add space for at least 18 constant buffers
+ buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
u8* buffer_ptr;
GLintptr buffer_offset;
- std::tie(buffer_ptr, buffer_offset) =
- stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4);
+ std::tie(buffer_ptr, buffer_offset, std::ignore) =
+ stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);
+ u8* buffer_ptr_base = buffer_ptr;
- u8* offseted_buffer;
- std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
-
- offseted_buffer =
- reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
- buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
+ std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset);
// If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0;
if (is_indexed) {
- const auto& memory_manager = Core::System::GetInstance().GPU().memory_manager;
- const boost::optional<VAddr> index_data_addr{
- memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())};
- Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size);
-
- index_buffer_offset = buffer_offset;
- offseted_buffer += index_buffer_size;
- buffer_offset += index_buffer_size;
+ std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory(
+ buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size);
}
- offseted_buffer =
- reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4));
- buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4);
-
- SetupShaders(offseted_buffer, buffer_offset);
+ std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset);
- stream_buffer->Unmap();
+ stream_buffer.Unmap(buffer_ptr - buffer_ptr_base);
shader_program_manager->ApplyTo(state);
state.Apply();
@@ -647,36 +627,23 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
}
}
-u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint program,
- u32 current_bindpoint,
- const std::vector<GLShader::ConstBufferEntry>& entries) {
+std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(
+ u8* buffer_ptr, GLintptr buffer_offset, Maxwell::ShaderStage stage, GLuint program,
+ u32 current_bindpoint, const std::vector<GLShader::ConstBufferEntry>& entries) {
const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D();
- // Reset all buffer draw state for this stage.
- for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
- buffer.bindpoint = 0;
- buffer.enabled = false;
- }
-
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<size_t>(stage)];
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& used_buffer = entries[bindpoint];
const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
- auto& buffer_draw_state =
- state.draw.const_buffers[static_cast<size_t>(stage)][used_buffer.GetIndex()];
if (!buffer.enabled) {
continue;
}
- buffer_draw_state.enabled = true;
- buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
-
- boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
-
size_t size = 0;
if (used_buffer.IsIndirect()) {
@@ -698,25 +665,26 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
- std::vector<u8> data(size);
- Memory::ReadBlock(*addr, data.data(), data.size());
+ GLintptr const_buffer_offset;
+ std::tie(buffer_ptr, buffer_offset, const_buffer_offset) =
+ UploadMemory(buffer_ptr, buffer_offset, buffer.address, size,
+ static_cast<size_t>(uniform_buffer_alignment));
- glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo);
- glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
- glBindBuffer(GL_UNIFORM_BUFFER, 0);
+ glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint,
+ stream_buffer.GetHandle(), const_buffer_offset, size);
// Now configure the bindpoint of the buffer inside the shader
const std::string buffer_name = used_buffer.GetName();
const GLuint index =
glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str());
if (index != GL_INVALID_INDEX) {
- glUniformBlockBinding(program, index, buffer_draw_state.bindpoint);
+ glUniformBlockBinding(program, index, current_bindpoint + bindpoint);
}
}
state.Apply();
- return current_bindpoint + static_cast<u32>(entries.size());
+ return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())};
}
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program, u32 current_unit,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bd01dc0ae..74307f626 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -7,6 +7,7 @@
#include <array>
#include <cstddef>
#include <memory>
+#include <tuple>
#include <utility>
#include <vector>
#include <glad/glad.h>
@@ -100,9 +101,10 @@ private:
* @param entries Vector describing the buffers that are actually used in the guest shader.
* @returns The next available bindpoint for use in the next shader stage.
*/
- u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, GLuint program,
- u32 current_bindpoint,
- const std::vector<GLShader::ConstBufferEntry>& entries);
+ std::tuple<u8*, GLintptr, u32> SetupConstBuffers(
+ u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ GLuint program, u32 current_bindpoint,
+ const std::vector<GLShader::ConstBufferEntry>& entries);
/*
* Configures the current textures to use for the draw command.
@@ -139,7 +141,6 @@ private:
/// Syncs the blend state to match the guest state
void SyncBlendState();
- bool has_ARB_buffer_storage = false;
bool has_ARB_direct_state_access = false;
bool has_ARB_separate_shader_objects = false;
bool has_ARB_vertex_attrib_binding = false;
@@ -155,22 +156,24 @@ private:
OGLVertexArray hw_vao;
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
- std::array<std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers>,
- Tegra::Engines::Maxwell3D::Regs::MaxShaderStage>
- ssbos;
static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
- std::unique_ptr<OGLStreamBuffer> stream_buffer;
+ OGLStreamBuffer stream_buffer;
OGLBuffer uniform_buffer;
OGLFramebuffer framebuffer;
+ GLint uniform_buffer_alignment;
size_t CalculateVertexArraysSize() const;
std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset);
- std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
+ std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
- void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset);
+ std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment);
+
+ std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset,
+ Tegra::GPUVAddr gpu_addr, size_t size,
+ size_t alignment = 4);
enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 7e038ac86..6834d7085 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -376,6 +376,8 @@ public:
return value;
} else if (type == GLSLRegister::Type::Integer) {
return "floatBitsToInt(" + value + ')';
+ } else if (type == GLSLRegister::Type::UnsignedInteger) {
+ return "floatBitsToUint(" + value + ')';
} else {
UNREACHABLE();
}
@@ -1630,6 +1632,99 @@ private:
}
break;
}
+ case OpCode::Type::Xmad: {
+ ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented");
+ ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented");
+
+ std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
+ std::string op_b;
+ std::string op_c;
+
+ // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
+ ASSERT_MSG(instr.xmad.sign_a == instr.xmad.sign_b, "Unimplemented");
+ const bool is_signed{instr.xmad.sign_a == 1};
+
+ bool is_merge{};
+ switch (opcode->GetId()) {
+ case OpCode::Id::XMAD_CR: {
+ is_merge = instr.xmad.merge_56;
+ op_b += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ instr.xmad.sign_b ? GLSLRegister::Type::Integer
+ : GLSLRegister::Type::UnsignedInteger);
+ op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
+ break;
+ }
+ case OpCode::Id::XMAD_RR: {
+ is_merge = instr.xmad.merge_37;
+ op_b += regs.GetRegisterAsInteger(instr.gpr20, 0, instr.xmad.sign_b);
+ op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
+ break;
+ }
+ case OpCode::Id::XMAD_RC: {
+ op_b += regs.GetRegisterAsInteger(instr.gpr39, 0, instr.xmad.sign_b);
+ op_c += regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+ is_signed ? GLSLRegister::Type::Integer
+ : GLSLRegister::Type::UnsignedInteger);
+ break;
+ }
+ case OpCode::Id::XMAD_IMM: {
+ is_merge = instr.xmad.merge_37;
+ op_b += std::to_string(instr.xmad.imm20_16);
+ op_c += regs.GetRegisterAsInteger(instr.gpr39, 0, is_signed);
+ break;
+ }
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled XMAD instruction: {}", opcode->GetName());
+ UNREACHABLE();
+ }
+ }
+
+ // TODO(bunnei): Ensure this is right with signed operands
+ if (instr.xmad.high_a) {
+ op_a = "((" + op_a + ") >> 16)";
+ } else {
+ op_a = "((" + op_a + ") & 0xFFFF)";
+ }
+
+ std::string src2 = '(' + op_b + ')'; // Preserve original source 2
+ if (instr.xmad.high_b) {
+ op_b = '(' + src2 + " >> 16)";
+ } else {
+ op_b = '(' + src2 + " & 0xFFFF)";
+ }
+
+ std::string product = '(' + op_a + " * " + op_b + ')';
+ if (instr.xmad.product_shift_left) {
+ product = '(' + product + " << 16)";
+ }
+
+ switch (instr.xmad.mode) {
+ case Tegra::Shader::XmadMode::None:
+ break;
+ case Tegra::Shader::XmadMode::CLo:
+ op_c = "((" + op_c + ") & 0xFFFF)";
+ break;
+ case Tegra::Shader::XmadMode::CHi:
+ op_c = "((" + op_c + ") >> 16)";
+ break;
+ case Tegra::Shader::XmadMode::CBcc:
+ op_c = "((" + op_c + ") + (" + src2 + "<< 16))";
+ break;
+ default: {
+ LOG_CRITICAL(HW_GPU, "Unhandled XMAD mode: {}",
+ static_cast<u32>(instr.xmad.mode.Value()));
+ UNREACHABLE();
+ }
+ }
+
+ std::string sum{'(' + product + " + " + op_c + ')'};
+ if (is_merge) {
+ sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))";
+ }
+
+ regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1);
+ break;
+ }
default: {
switch (opcode->GetId()) {
case OpCode::Id::EXIT: {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 68bacd4c5..1d1975179 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -203,21 +203,6 @@ void OpenGLState::Apply() const {
}
}
- // Constbuffers
- for (std::size_t stage = 0; stage < draw.const_buffers.size(); ++stage) {
- for (std::size_t buffer_id = 0; buffer_id < draw.const_buffers[stage].size(); ++buffer_id) {
- const auto& current = cur_state.draw.const_buffers[stage][buffer_id];
- const auto& new_state = draw.const_buffers[stage][buffer_id];
-
- if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
- current.ssbo != new_state.ssbo) {
- if (new_state.enabled) {
- glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo);
- }
- }
- }
- }
-
// Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 5c7b636e4..bdb02ba25 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -119,12 +119,6 @@ public:
GLuint uniform_buffer; // GL_UNIFORM_BUFFER_BINDING
GLuint shader_program; // GL_CURRENT_PROGRAM
GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
- struct ConstBufferConfig {
- bool enabled = false;
- GLuint bindpoint;
- GLuint ssbo;
- };
- std::array<std::array<ConstBufferConfig, Regs::MaxConstBuffers>, 5> const_buffers;
} draw;
struct {
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index a2713e9f0..03a8ed8b7 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -9,174 +9,91 @@
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
-class OrphanBuffer : public OGLStreamBuffer {
-public:
- explicit OrphanBuffer(GLenum target) : OGLStreamBuffer(target) {}
- ~OrphanBuffer() override;
-
-private:
- void Create(size_t size, size_t sync_subdivide) override;
- void Release() override;
-
- std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
- void Unmap() override;
-
- std::vector<u8> data;
-};
-
-class StorageBuffer : public OGLStreamBuffer {
-public:
- explicit StorageBuffer(GLenum target) : OGLStreamBuffer(target) {}
- ~StorageBuffer() override;
-
-private:
- void Create(size_t size, size_t sync_subdivide) override;
- void Release() override;
-
- std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) override;
- void Unmap() override;
-
- struct Fence {
- OGLSync sync;
- size_t offset;
- };
- std::deque<Fence> head;
- std::deque<Fence> tail;
-
- u8* mapped_ptr;
-};
-
-OGLStreamBuffer::OGLStreamBuffer(GLenum target) {
- gl_target = target;
-}
-
-GLuint OGLStreamBuffer::GetHandle() const {
- return gl_buffer.handle;
-}
+OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent)
+ : gl_target(target), buffer_size(size) {
+ gl_buffer.Create();
+ glBindBuffer(gl_target, gl_buffer.handle);
-std::unique_ptr<OGLStreamBuffer> OGLStreamBuffer::MakeBuffer(bool storage_buffer, GLenum target) {
- if (storage_buffer) {
- return std::make_unique<StorageBuffer>(target);
+ GLsizeiptr allocate_size = size;
+ if (target == GL_ARRAY_BUFFER) {
+ // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
+ // read position is near the end and is an out-of-bound access to the vertex buffer. This is
+ // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
+ // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
+ // crash.
+ allocate_size *= 2;
}
- return std::make_unique<OrphanBuffer>(target);
-}
-OrphanBuffer::~OrphanBuffer() {
- Release();
+ if (GLAD_GL_ARB_buffer_storage) {
+ persistent = true;
+ coherent = prefer_coherent;
+ GLbitfield flags =
+ GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
+ glBufferStorage(gl_target, allocate_size, nullptr, flags);
+ mapped_ptr = static_cast<u8*>(glMapBufferRange(
+ gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
+ } else {
+ glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
+ }
}
-void OrphanBuffer::Create(size_t size, size_t /*sync_subdivide*/) {
- buffer_pos = 0;
- buffer_size = size;
- data.resize(buffer_size);
-
- if (gl_buffer.handle == 0) {
- gl_buffer.Create();
+OGLStreamBuffer::~OGLStreamBuffer() {
+ if (persistent) {
glBindBuffer(gl_target, gl_buffer.handle);
+ glUnmapBuffer(gl_target);
}
-
- glBufferData(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr, GL_STREAM_DRAW);
-}
-
-void OrphanBuffer::Release() {
gl_buffer.Release();
}
-std::pair<u8*, GLintptr> OrphanBuffer::Map(size_t size, size_t alignment) {
- buffer_pos = Common::AlignUp(buffer_pos, alignment);
-
- if (buffer_pos + size > buffer_size) {
- Create(std::max(buffer_size, size), 0);
- }
-
- mapped_size = size;
- return std::make_pair(&data[buffer_pos], static_cast<GLintptr>(buffer_pos));
-}
-
-void OrphanBuffer::Unmap() {
- glBufferSubData(gl_target, static_cast<GLintptr>(buffer_pos),
- static_cast<GLsizeiptr>(mapped_size), &data[buffer_pos]);
- buffer_pos += mapped_size;
-}
-
-StorageBuffer::~StorageBuffer() {
- Release();
+GLuint OGLStreamBuffer::GetHandle() const {
+ return gl_buffer.handle;
}
-void StorageBuffer::Create(size_t size, size_t sync_subdivide) {
- if (gl_buffer.handle != 0)
- return;
-
- buffer_pos = 0;
- buffer_size = size;
- buffer_sync_subdivide = std::max<size_t>(sync_subdivide, 1);
-
- gl_buffer.Create();
- glBindBuffer(gl_target, gl_buffer.handle);
-
- glBufferStorage(gl_target, static_cast<GLsizeiptr>(buffer_size), nullptr,
- GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
- mapped_ptr = reinterpret_cast<u8*>(
- glMapBufferRange(gl_target, 0, static_cast<GLsizeiptr>(buffer_size),
- GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
+GLsizeiptr OGLStreamBuffer::GetSize() const {
+ return buffer_size;
}
-void StorageBuffer::Release() {
- if (gl_buffer.handle == 0)
- return;
-
- glUnmapBuffer(gl_target);
-
- gl_buffer.Release();
- head.clear();
- tail.clear();
-}
-
-std::pair<u8*, GLintptr> StorageBuffer::Map(size_t size, size_t alignment) {
+std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT(size <= buffer_size);
+ ASSERT(alignment <= buffer_size);
+ mapped_size = size;
- OGLSync sync;
-
- buffer_pos = Common::AlignUp(buffer_pos, alignment);
- size_t effective_offset = Common::AlignDown(buffer_pos, buffer_sync_subdivide);
-
- if (!head.empty() &&
- (effective_offset > head.back().offset || buffer_pos + size > buffer_size)) {
- ASSERT(head.back().sync.handle == 0);
- head.back().sync.Create();
+ if (alignment > 0) {
+ buffer_pos = Common::AlignUp<size_t>(buffer_pos, alignment);
}
+ bool invalidate = false;
if (buffer_pos + size > buffer_size) {
- if (!tail.empty()) {
- std::swap(sync, tail.back().sync);
- tail.clear();
- }
- std::swap(tail, head);
buffer_pos = 0;
- effective_offset = 0;
- }
+ invalidate = true;
- while (!tail.empty() && buffer_pos + size > tail.front().offset) {
- std::swap(sync, tail.front().sync);
- tail.pop_front();
+ if (persistent) {
+ glUnmapBuffer(gl_target);
+ }
}
- if (sync.handle != 0) {
- glClientWaitSync(sync.handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
- sync.Release();
+ if (invalidate | !persistent) {
+ GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
+ (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
+ (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
+ mapped_ptr = static_cast<u8*>(
+ glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
+ mapped_offset = buffer_pos;
}
- if (head.empty() || effective_offset > head.back().offset) {
- head.emplace_back();
- head.back().offset = effective_offset;
+ return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
+}
+
+void OGLStreamBuffer::Unmap(GLsizeiptr size) {
+ ASSERT(size <= mapped_size);
+
+ if (!coherent && size > 0) {
+ glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
}
- mapped_size = size;
- return std::make_pair(&mapped_ptr[buffer_pos], static_cast<GLintptr>(buffer_pos));
-}
+ if (!persistent) {
+ glUnmapBuffer(gl_target);
+ }
-void StorageBuffer::Unmap() {
- glFlushMappedBufferRange(gl_target, static_cast<GLintptr>(buffer_pos),
- static_cast<GLsizeiptr>(mapped_size));
- buffer_pos += mapped_size;
+ buffer_pos += size;
}
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index e78dc5784..45592daaf 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -2,35 +2,41 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#pragma once
-
-#include <memory>
+#include <tuple>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
class OGLStreamBuffer : private NonCopyable {
public:
- explicit OGLStreamBuffer(GLenum target);
- virtual ~OGLStreamBuffer() = default;
-
-public:
- static std::unique_ptr<OGLStreamBuffer> MakeBuffer(bool storage_buffer, GLenum target);
-
- virtual void Create(size_t size, size_t sync_subdivide) = 0;
- virtual void Release() {}
+ explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coherent = false);
+ ~OGLStreamBuffer();
GLuint GetHandle() const;
+ GLsizeiptr GetSize() const;
+
+ /*
+ * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
+ * and the optional alignment requirement.
+ * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
+ * The return values are the pointer to the new chunk, the offset within the buffer,
+ * and the invalidation flag for previous chunks.
+ * The actual used size must be specified on unmapping the chunk.
+ */
+ std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
- virtual std::pair<u8*, GLintptr> Map(size_t size, size_t alignment) = 0;
- virtual void Unmap() = 0;
+ void Unmap(GLsizeiptr size);
-protected:
+private:
OGLBuffer gl_buffer;
GLenum gl_target;
- size_t buffer_pos = 0;
- size_t buffer_size = 0;
- size_t buffer_sync_subdivide = 0;
- size_t mapped_size = 0;
+ bool coherent = false;
+ bool persistent = false;
+
+ GLintptr buffer_pos = 0;
+ GLsizeiptr buffer_size = 0;
+ GLintptr mapped_offset = 0;
+ GLsizeiptr mapped_size = 0;
+ u8* mapped_ptr = nullptr;
};