summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/audio_core/audio_core.cpp19
-rw-r--r--src/audio_core/audio_core.h13
-rw-r--r--src/audio_core/hle/dsp.cpp14
-rw-r--r--src/audio_core/hle/dsp.h21
-rw-r--r--src/core/core.cpp5
-rw-r--r--src/core/hle/kernel/memory.cpp112
-rw-r--r--src/core/hle/kernel/memory.h10
-rw-r--r--src/core/hle/kernel/process.cpp23
-rw-r--r--src/core/hle/kernel/process.h2
-rw-r--r--src/core/memory.cpp8
-rw-r--r--src/core/memory.h10
-rw-r--r--src/video_core/command_processor.cpp212
-rw-r--r--src/video_core/shader/shader.h7
-rw-r--r--src/video_core/shader/shader_interpreter.cpp2
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp2
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.cpp4
-rw-r--r--src/video_core/shader/shader_jit_x64_compiler.h14
17 files changed, 328 insertions, 150 deletions
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index 84f9c03a7..9c2e6ed88 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <memory>
#include <string>
#include "audio_core/audio_core.h"
@@ -10,8 +11,8 @@
#include "audio_core/null_sink.h"
#include "audio_core/sink.h"
#include "audio_core/sink_details.h"
+#include "common/common_types.h"
#include "core/core_timing.h"
-#include "core/hle/kernel/vm_manager.h"
#include "core/hle/service/dsp_dsp.h"
namespace AudioCore {
@@ -39,20 +40,8 @@ void Init() {
CoreTiming::ScheduleEvent(audio_frame_ticks, tick_event);
}
-void AddAddressSpace(Kernel::VMManager& address_space) {
- auto r0_vma = address_space
- .MapBackingMemory(DSP::HLE::region0_base,
- reinterpret_cast<u8*>(&DSP::HLE::g_regions[0]),
- sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO)
- .MoveFrom();
- address_space.Reprotect(r0_vma, Kernel::VMAPermission::ReadWrite);
-
- auto r1_vma = address_space
- .MapBackingMemory(DSP::HLE::region1_base,
- reinterpret_cast<u8*>(&DSP::HLE::g_regions[1]),
- sizeof(DSP::HLE::SharedMemory), Kernel::MemoryState::IO)
- .MoveFrom();
- address_space.Reprotect(r1_vma, Kernel::VMAPermission::ReadWrite);
+std::array<u8, Memory::DSP_RAM_SIZE>& GetDspMemory() {
+ return DSP::HLE::g_dsp_memory.raw_memory;
}
void SelectSink(std::string sink_id) {
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index 0edf6dd15..ab323ce1f 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -4,11 +4,10 @@
#pragma once
+#include <array>
#include <string>
-
-namespace Kernel {
-class VMManager;
-}
+#include "common/common_types.h"
+#include "core/memory.h"
namespace AudioCore {
@@ -17,8 +16,8 @@ constexpr int native_sample_rate = 32728; ///< 32kHz
/// Initialise Audio Core
void Init();
-/// Add DSP address spaces to a Process.
-void AddAddressSpace(Kernel::VMManager& vm_manager);
+/// Returns a reference to the array backing DSP memory
+std::array<u8, Memory::DSP_RAM_SIZE>& GetDspMemory();
/// Select the sink to use based on sink id.
void SelectSink(std::string sink_id);
@@ -29,4 +28,4 @@ void EnableStretching(bool enable);
/// Shutdown Audio Core
void Shutdown();
-} // namespace
+} // namespace AudioCore
diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp
index 31421fdc6..260b182ed 100644
--- a/src/audio_core/hle/dsp.cpp
+++ b/src/audio_core/hle/dsp.cpp
@@ -16,31 +16,33 @@ namespace HLE {
// Region management
-std::array<SharedMemory, 2> g_regions;
+DspMemory g_dsp_memory;
static size_t CurrentRegionIndex() {
// The region with the higher frame counter is chosen unless there is wraparound.
// This function only returns a 0 or 1.
+ u16 frame_counter_0 = g_dsp_memory.region_0.frame_counter;
+ u16 frame_counter_1 = g_dsp_memory.region_1.frame_counter;
- if (g_regions[0].frame_counter == 0xFFFFu && g_regions[1].frame_counter != 0xFFFEu) {
+ if (frame_counter_0 == 0xFFFFu && frame_counter_1 != 0xFFFEu) {
// Wraparound has occurred.
return 1;
}
- if (g_regions[1].frame_counter == 0xFFFFu && g_regions[0].frame_counter != 0xFFFEu) {
+ if (frame_counter_1 == 0xFFFFu && frame_counter_0 != 0xFFFEu) {
// Wraparound has occurred.
return 0;
}
- return (g_regions[0].frame_counter > g_regions[1].frame_counter) ? 0 : 1;
+ return (frame_counter_0 > frame_counter_1) ? 0 : 1;
}
static SharedMemory& ReadRegion() {
- return g_regions[CurrentRegionIndex()];
+ return CurrentRegionIndex() == 0 ? g_dsp_memory.region_0 : g_dsp_memory.region_1;
}
static SharedMemory& WriteRegion() {
- return g_regions[1 - CurrentRegionIndex()];
+ return CurrentRegionIndex() != 0 ? g_dsp_memory.region_0 : g_dsp_memory.region_1;
}
// Audio processing and mixing
diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h
index 0a0f60ac1..94ce48863 100644
--- a/src/audio_core/hle/dsp.h
+++ b/src/audio_core/hle/dsp.h
@@ -31,8 +31,8 @@ namespace HLE {
// double-buffer. The frame counter is located as the very last u16 of each region and is
// incremented each audio tick.
-constexpr VAddr region0_base = 0x1FF50000;
-constexpr VAddr region1_base = 0x1FF70000;
+constexpr u32 region0_offset = 0x50000;
+constexpr u32 region1_offset = 0x70000;
/**
* The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from
@@ -512,7 +512,22 @@ struct SharedMemory {
};
ASSERT_DSP_STRUCT(SharedMemory, 0x8000);
-extern std::array<SharedMemory, 2> g_regions;
+union DspMemory {
+ std::array<u8, 0x80000> raw_memory;
+ struct {
+ u8 unused_0[0x50000];
+ SharedMemory region_0;
+ u8 unused_1[0x18000];
+ SharedMemory region_1;
+ u8 unused_2[0x8000];
+ };
+};
+static_assert(offsetof(DspMemory, region_0) == region0_offset,
+ "DSP region 0 is at the wrong offset");
+static_assert(offsetof(DspMemory, region_1) == region1_offset,
+ "DSP region 1 is at the wrong offset");
+
+extern DspMemory g_dsp_memory;
// Structures must have an offset that is a multiple of two.
static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0,
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 140ff6451..881f1e93c 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -13,11 +13,11 @@
#include "core/core_timing.h"
#include "core/gdbstub/gdbstub.h"
#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/memory.h"
#include "core/hle/kernel/thread.h"
#include "core/hle/service/service.h"
#include "core/hw/hw.h"
#include "core/loader/loader.h"
+#include "core/memory_setup.h"
#include "core/settings.h"
#include "video_core/video_core.h"
@@ -123,7 +123,8 @@ void System::Reschedule() {
}
System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
- Memory::Init();
+ Memory::InitMemoryMap();
+ LOG_DEBUG(HW_Memory, "initialized OK");
if (Settings::values.use_cpu_jit) {
cpu_core = std::make_unique<ARM_Dynarmic>(USER32MODE);
diff --git a/src/core/hle/kernel/memory.cpp b/src/core/hle/kernel/memory.cpp
index 33c165197..8250a90b5 100644
--- a/src/core/hle/kernel/memory.cpp
+++ b/src/core/hle/kernel/memory.cpp
@@ -2,11 +2,13 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cinttypes>
#include <map>
#include <memory>
#include <utility>
#include <vector>
#include "audio_core/audio_core.h"
+#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
#include "core/hle/config_mem.h"
@@ -92,52 +94,96 @@ MemoryRegionInfo* GetMemoryRegion(MemoryRegion region) {
UNREACHABLE();
}
}
-}
-
-namespace Memory {
-namespace {
+std::array<u8, Memory::VRAM_SIZE> vram;
+std::array<u8, Memory::N3DS_EXTRA_RAM_SIZE> n3ds_extra_ram;
+
+void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mapping) {
+ using namespace Memory;
+
+ struct MemoryArea {
+ VAddr vaddr_base;
+ PAddr paddr_base;
+ u32 size;
+ };
+
+ // The order of entries in this array is important. The VRAM and IO VAddr ranges overlap, and
+ // VRAM must be tried first.
+ static constexpr MemoryArea memory_areas[] = {
+ {VRAM_VADDR, VRAM_PADDR, VRAM_SIZE},
+ {IO_AREA_VADDR, IO_AREA_PADDR, IO_AREA_SIZE},
+ {DSP_RAM_VADDR, DSP_RAM_PADDR, DSP_RAM_SIZE},
+ {N3DS_EXTRA_RAM_VADDR, N3DS_EXTRA_RAM_PADDR, N3DS_EXTRA_RAM_SIZE - 0x20000},
+ };
+
+ VAddr mapping_limit = mapping.address + mapping.size;
+ if (mapping_limit < mapping.address) {
+ LOG_CRITICAL(Loader, "Mapping size overflowed: address=0x%08" PRIX32 " size=0x%" PRIX32,
+ mapping.address, mapping.size);
+ return;
+ }
-struct MemoryArea {
- u32 base;
- u32 size;
- const char* name;
-};
+ auto area =
+ std::find_if(std::begin(memory_areas), std::end(memory_areas), [&](const auto& area) {
+ return mapping.address >= area.vaddr_base &&
+ mapping_limit <= area.vaddr_base + area.size;
+ });
+ if (area == std::end(memory_areas)) {
+ LOG_ERROR(Loader, "Unhandled special mapping: address=0x%08" PRIX32 " size=0x%" PRIX32
+ " read_only=%d unk_flag=%d",
+ mapping.address, mapping.size, mapping.read_only, mapping.unk_flag);
+ return;
+ }
-// We don't declare the IO regions in here since its handled by other means.
-static MemoryArea memory_areas[] = {
- {VRAM_VADDR, VRAM_SIZE, "VRAM"}, // Video memory (VRAM)
-};
-}
+ u32 offset_into_region = mapping.address - area->vaddr_base;
+ if (area->paddr_base == IO_AREA_PADDR) {
+ LOG_ERROR(Loader, "MMIO mappings are not supported yet. phys_addr=0x%08" PRIX32,
+ area->paddr_base + offset_into_region);
+ return;
+ }
-void Init() {
- InitMemoryMap();
- LOG_DEBUG(HW_Memory, "initialized OK");
-}
+ // TODO(yuriks): Use GetPhysicalPointer when that becomes independent of the virtual
+ // mappings.
+ u8* target_pointer = nullptr;
+ switch (area->paddr_base) {
+ case VRAM_PADDR:
+ target_pointer = vram.data();
+ break;
+ case DSP_RAM_PADDR:
+ target_pointer = AudioCore::GetDspMemory().data();
+ break;
+ case N3DS_EXTRA_RAM_PADDR:
+ target_pointer = n3ds_extra_ram.data();
+ break;
+ default:
+ UNREACHABLE();
+ }
-void InitLegacyAddressSpace(Kernel::VMManager& address_space) {
- using namespace Kernel;
+ // TODO(yuriks): This flag seems to have some other effect, but it's unknown what
+ MemoryState memory_state = mapping.unk_flag ? MemoryState::Static : MemoryState::IO;
- for (MemoryArea& area : memory_areas) {
- auto block = std::make_shared<std::vector<u8>>(area.size);
- address_space
- .MapMemoryBlock(area.base, std::move(block), 0, area.size, MemoryState::Private)
- .Unwrap();
- }
+ auto vma = address_space
+ .MapBackingMemory(mapping.address, target_pointer + offset_into_region,
+ mapping.size, memory_state)
+ .MoveFrom();
+ address_space.Reprotect(vma,
+ mapping.read_only ? VMAPermission::Read : VMAPermission::ReadWrite);
+}
+void MapSharedPages(VMManager& address_space) {
auto cfg_mem_vma = address_space
- .MapBackingMemory(CONFIG_MEMORY_VADDR, (u8*)&ConfigMem::config_mem,
- CONFIG_MEMORY_SIZE, MemoryState::Shared)
+ .MapBackingMemory(Memory::CONFIG_MEMORY_VADDR,
+ reinterpret_cast<u8*>(&ConfigMem::config_mem),
+ Memory::CONFIG_MEMORY_SIZE, MemoryState::Shared)
.MoveFrom();
address_space.Reprotect(cfg_mem_vma, VMAPermission::Read);
auto shared_page_vma = address_space
- .MapBackingMemory(SHARED_PAGE_VADDR, (u8*)&SharedPage::shared_page,
- SHARED_PAGE_SIZE, MemoryState::Shared)
+ .MapBackingMemory(Memory::SHARED_PAGE_VADDR,
+ reinterpret_cast<u8*>(&SharedPage::shared_page),
+ Memory::SHARED_PAGE_SIZE, MemoryState::Shared)
.MoveFrom();
address_space.Reprotect(shared_page_vma, VMAPermission::Read);
-
- AudioCore::AddAddressSpace(address_space);
}
-} // namespace
+} // namespace Kernel
diff --git a/src/core/hle/kernel/memory.h b/src/core/hle/kernel/memory.h
index 4e1856a41..08c1a9989 100644
--- a/src/core/hle/kernel/memory.h
+++ b/src/core/hle/kernel/memory.h
@@ -23,11 +23,7 @@ struct MemoryRegionInfo {
void MemoryInit(u32 mem_type);
void MemoryShutdown();
MemoryRegionInfo* GetMemoryRegion(MemoryRegion region);
-}
-namespace Memory {
-
-void Init();
-void InitLegacyAddressSpace(Kernel::VMManager& address_space);
-
-} // namespace
+void HandleSpecialMapping(VMManager& address_space, const AddressMapping& mapping);
+void MapSharedPages(VMManager& address_space);
+} // namespace Kernel
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index ba80fe7f8..32cb25fb7 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -35,7 +35,6 @@ SharedPtr<Process> Process::Create(SharedPtr<CodeSet> code_set) {
process->codeset = std::move(code_set);
process->flags.raw = 0;
process->flags.memory_region.Assign(MemoryRegion::APPLICATION);
- Memory::InitLegacyAddressSpace(process->vm_manager);
return process;
}
@@ -78,8 +77,15 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
AddressMapping mapping;
mapping.address = descriptor << 12;
- mapping.size = (end_desc << 12) - mapping.address;
- mapping.writable = (descriptor & (1 << 20)) != 0;
+ VAddr end_address = end_desc << 12;
+
+ if (mapping.address < end_address) {
+ mapping.size = end_address - mapping.address;
+ } else {
+ mapping.size = 0;
+ }
+
+ mapping.read_only = (descriptor & (1 << 20)) != 0;
mapping.unk_flag = (end_desc & (1 << 20)) != 0;
address_mappings.push_back(mapping);
@@ -88,8 +94,10 @@ void Process::ParseKernelCaps(const u32* kernel_caps, size_t len) {
AddressMapping mapping;
mapping.address = descriptor << 12;
mapping.size = Memory::PAGE_SIZE;
- mapping.writable = true; // TODO: Not sure if correct
+ mapping.read_only = false;
mapping.unk_flag = false;
+
+ address_mappings.push_back(mapping);
} else if ((type & 0xFE0) == 0xFC0) { // 0x01FF
// Kernel version
kernel_version = descriptor & 0xFFFF;
@@ -131,6 +139,12 @@ void Process::Run(s32 main_thread_priority, u32 stack_size) {
misc_memory_used += stack_size;
memory_region->used += stack_size;
+ // Map special address mappings
+ MapSharedPages(vm_manager);
+ for (const auto& mapping : address_mappings) {
+ HandleSpecialMapping(vm_manager, mapping);
+ }
+
vm_manager.LogLayout(Log::Level::Debug);
Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority);
}
@@ -138,6 +152,7 @@ void Process::Run(s32 main_thread_priority, u32 stack_size) {
VAddr Process::GetLinearHeapAreaAddress() const {
return kernel_version < 0x22C ? Memory::LINEAR_HEAP_VADDR : Memory::NEW_LINEAR_HEAP_VADDR;
}
+
VAddr Process::GetLinearHeapBase() const {
return GetLinearHeapAreaAddress() + memory_region->base;
}
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index b566950b0..b52211d2a 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -20,7 +20,7 @@ struct AddressMapping {
// Address and size must be page-aligned
VAddr address;
u32 size;
- bool writable;
+ bool read_only;
bool unk_flag;
};
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 65e4bba85..b8438e490 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -672,12 +672,14 @@ PAddr VirtualToPhysicalAddress(const VAddr addr) {
return addr - VRAM_VADDR + VRAM_PADDR;
} else if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
return addr - LINEAR_HEAP_VADDR + FCRAM_PADDR;
+ } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
+ return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR;
} else if (addr >= DSP_RAM_VADDR && addr < DSP_RAM_VADDR_END) {
return addr - DSP_RAM_VADDR + DSP_RAM_PADDR;
} else if (addr >= IO_AREA_VADDR && addr < IO_AREA_VADDR_END) {
return addr - IO_AREA_VADDR + IO_AREA_PADDR;
- } else if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
- return addr - NEW_LINEAR_HEAP_VADDR + FCRAM_PADDR;
+ } else if (addr >= N3DS_EXTRA_RAM_VADDR && addr < N3DS_EXTRA_RAM_VADDR_END) {
+ return addr - N3DS_EXTRA_RAM_VADDR + N3DS_EXTRA_RAM_PADDR;
}
LOG_ERROR(HW_Memory, "Unknown virtual address @ 0x%08X", addr);
@@ -696,6 +698,8 @@ VAddr PhysicalToVirtualAddress(const PAddr addr) {
return addr - DSP_RAM_PADDR + DSP_RAM_VADDR;
} else if (addr >= IO_AREA_PADDR && addr < IO_AREA_PADDR_END) {
return addr - IO_AREA_PADDR + IO_AREA_VADDR;
+ } else if (addr >= N3DS_EXTRA_RAM_PADDR && addr < N3DS_EXTRA_RAM_PADDR_END) {
+ return addr - N3DS_EXTRA_RAM_PADDR + N3DS_EXTRA_RAM_VADDR;
}
LOG_ERROR(HW_Memory, "Unknown physical address @ 0x%08X", addr);
diff --git a/src/core/memory.h b/src/core/memory.h
index 903b58a22..802aa465e 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -37,6 +37,12 @@ enum : PAddr {
VRAM_SIZE = 0x00600000, ///< VRAM size (6MB)
VRAM_PADDR_END = VRAM_PADDR + VRAM_SIZE,
+ /// New 3DS additional memory. Supposedly faster than regular FCRAM. Part of it can be used by
+ /// applications and system modules if mapped via the ExHeader.
+ N3DS_EXTRA_RAM_PADDR = 0x1F000000,
+ N3DS_EXTRA_RAM_SIZE = 0x00400000, ///< New 3DS additional memory size (4MB)
+ N3DS_EXTRA_RAM_PADDR_END = N3DS_EXTRA_RAM_PADDR + N3DS_EXTRA_RAM_SIZE,
+
/// DSP memory
DSP_RAM_PADDR = 0x1FF00000,
DSP_RAM_SIZE = 0x00080000, ///< DSP memory size (512KB)
@@ -81,6 +87,10 @@ enum : VAddr {
LINEAR_HEAP_SIZE = 0x08000000,
LINEAR_HEAP_VADDR_END = LINEAR_HEAP_VADDR + LINEAR_HEAP_SIZE,
+ /// Maps 1:1 to New 3DS additional memory
+ N3DS_EXTRA_RAM_VADDR = 0x1E800000,
+ N3DS_EXTRA_RAM_VADDR_END = N3DS_EXTRA_RAM_VADDR + N3DS_EXTRA_RAM_SIZE,
+
/// Maps 1:1 to the IO register area.
IO_AREA_VADDR = 0x1EC00000,
IO_AREA_VADDR_END = IO_AREA_VADDR + IO_AREA_SIZE,
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 2e32ff905..9a09f81dc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -32,12 +32,13 @@ namespace Pica {
namespace CommandProcessor {
-static int float_regs_counter = 0;
+static int vs_float_regs_counter = 0;
+static u32 vs_uniform_write_buffer[4];
-static u32 uniform_write_buffer[4];
+static int gs_float_regs_counter = 0;
+static u32 gs_uniform_write_buffer[4];
static int default_attr_counter = 0;
-
static u32 default_attr_write_buffer[3];
// Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF
@@ -48,6 +49,97 @@ static const u32 expand_bits_to_bytes[] = {
MICROPROFILE_DEFINE(GPU_Drawing, "GPU", "Drawing", MP_RGB(50, 50, 240));
+static const char* GetShaderSetupTypeName(Shader::ShaderSetup& setup) {
+ if (&setup == &g_state.vs) {
+ return "vertex shader";
+ }
+ if (&setup == &g_state.gs) {
+ return "geometry shader";
+ }
+ return "unknown shader";
+}
+
+static void WriteUniformBoolReg(Shader::ShaderSetup& setup, u32 value) {
+ for (unsigned i = 0; i < setup.uniforms.b.size(); ++i)
+ setup.uniforms.b[i] = (value & (1 << i)) != 0;
+}
+
+static void WriteUniformIntReg(Shader::ShaderSetup& setup, unsigned index,
+ const Math::Vec4<u8>& values) {
+ ASSERT(index < setup.uniforms.i.size());
+ setup.uniforms.i[index] = values;
+ LOG_TRACE(HW_GPU, "Set %s integer uniform %d to %02x %02x %02x %02x",
+ GetShaderSetupTypeName(setup), index, values.x, values.y, values.z, values.w);
+}
+
+static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup,
+ int& float_regs_counter, u32 uniform_write_buffer[4], u32 value) {
+ auto& uniform_setup = config.uniform_setup;
+
+ // TODO: Does actual hardware indeed keep an intermediate buffer or does
+ // it directly write the values?
+ uniform_write_buffer[float_regs_counter++] = value;
+
+ // Uniforms are written in a packed format such that four float24 values are encoded in
+ // three 32-bit numbers. We write to internal memory once a full such vector is
+ // written.
+ if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
+ (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
+ float_regs_counter = 0;
+
+ auto& uniform = setup.uniforms.f[uniform_setup.index];
+
+ if (uniform_setup.index >= 96) {
+ LOG_ERROR(HW_GPU, "Invalid %s float uniform index %d", GetShaderSetupTypeName(setup),
+ (int)uniform_setup.index);
+ } else {
+
+ // NOTE: The destination component order indeed is "backwards"
+ if (uniform_setup.IsFloat32()) {
+ for (auto i : {0, 1, 2, 3})
+ uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
+ } else {
+ // TODO: Untested
+ uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
+ uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) |
+ ((uniform_write_buffer[1] >> 16) & 0xFFFF));
+ uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) |
+ ((uniform_write_buffer[2] >> 24) & 0xFF));
+ uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
+ }
+
+ LOG_TRACE(HW_GPU, "Set %s float uniform %x to (%f %f %f %f)",
+ GetShaderSetupTypeName(setup), (int)uniform_setup.index,
+ uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
+ uniform.w.ToFloat32());
+
+ // TODO: Verify that this actually modifies the register!
+ uniform_setup.index.Assign(uniform_setup.index + 1);
+ }
+ }
+}
+
+static void WriteProgramCode(ShaderRegs& config, Shader::ShaderSetup& setup,
+ unsigned max_program_code_length, u32 value) {
+ if (config.program.offset >= max_program_code_length) {
+ LOG_ERROR(HW_GPU, "Invalid %s program offset %d", GetShaderSetupTypeName(setup),
+ (int)config.program.offset);
+ } else {
+ setup.program_code[config.program.offset] = value;
+ config.program.offset++;
+ }
+}
+
+static void WriteSwizzlePatterns(ShaderRegs& config, Shader::ShaderSetup& setup, u32 value) {
+ if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) {
+ LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", GetShaderSetupTypeName(setup),
+ (int)config.swizzle_patterns.offset);
+ } else {
+ setup.swizzle_data[config.swizzle_patterns.offset] = value;
+ config.swizzle_patterns.offset++;
+ }
+}
+
static void WritePicaReg(u32 id, u32 value, u32 mask) {
auto& regs = g_state.regs;
@@ -330,21 +422,70 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
break;
}
- case PICA_REG_INDEX(vs.bool_uniforms):
- for (unsigned i = 0; i < 16; ++i)
- g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0;
+ case PICA_REG_INDEX(gs.bool_uniforms):
+ WriteUniformBoolReg(g_state.gs, value);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281):
+ case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[1], 0x282):
+ case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[2], 0x283):
+ case PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[3], 0x284): {
+ unsigned index = (id - PICA_REG_INDEX_WORKAROUND(gs.int_uniforms[0], 0x281));
+ auto values = regs.gs.int_uniforms[index];
+ WriteUniformIntReg(g_state.gs, index,
+ Math::Vec4<u8>(values.x, values.y, values.z, values.w));
+ break;
+ }
+
+ case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[0], 0x291):
+ case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[1], 0x292):
+ case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[2], 0x293):
+ case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[3], 0x294):
+ case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[4], 0x295):
+ case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[5], 0x296):
+ case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[6], 0x297):
+ case PICA_REG_INDEX_WORKAROUND(gs.uniform_setup.set_value[7], 0x298): {
+ WriteUniformFloatReg(g_state.regs.gs, g_state.gs, gs_float_regs_counter,
+ gs_uniform_write_buffer, value);
+ break;
+ }
+
+ case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[0], 0x29c):
+ case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[1], 0x29d):
+ case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[2], 0x29e):
+ case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[3], 0x29f):
+ case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[4], 0x2a0):
+ case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1):
+ case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2):
+ case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3): {
+ WriteProgramCode(g_state.regs.gs, g_state.gs, 4096, value);
+ break;
+ }
+
+ case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[0], 0x2a6):
+ case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[1], 0x2a7):
+ case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[2], 0x2a8):
+ case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[3], 0x2a9):
+ case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[4], 0x2aa):
+ case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab):
+ case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac):
+ case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad): {
+ WriteSwizzlePatterns(g_state.regs.gs, g_state.gs, value);
+ break;
+ }
+
+ case PICA_REG_INDEX(vs.bool_uniforms):
+ WriteUniformBoolReg(g_state.vs, value);
break;
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1):
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2):
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): {
- int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
+ unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
auto values = regs.vs.int_uniforms[index];
- g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w);
- LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(),
- values.y.Value(), values.z.Value(), values.w.Value());
+ WriteUniformIntReg(g_state.vs, index,
+ Math::Vec4<u8>(values.x, values.y, values.z, values.w));
break;
}
@@ -356,51 +497,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6):
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): {
- auto& uniform_setup = regs.vs.uniform_setup;
-
- // TODO: Does actual hardware indeed keep an intermediate buffer or does
- // it directly write the values?
- uniform_write_buffer[float_regs_counter++] = value;
-
- // Uniforms are written in a packed format such that four float24 values are encoded in
- // three 32-bit numbers. We write to internal memory once a full such vector is
- // written.
- if ((float_regs_counter >= 4 && uniform_setup.IsFloat32()) ||
- (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) {
- float_regs_counter = 0;
-
- auto& uniform = g_state.vs.uniforms.f[uniform_setup.index];
-
- if (uniform_setup.index > 95) {
- LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index);
- break;
- }
-
- // NOTE: The destination component order indeed is "backwards"
- if (uniform_setup.IsFloat32()) {
- for (auto i : {0, 1, 2, 3})
- uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
- } else {
- // TODO: Untested
- uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
- uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) |
- ((uniform_write_buffer[1] >> 16) & 0xFFFF));
- uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) |
- ((uniform_write_buffer[2] >> 24) & 0xFF));
- uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
- }
-
- LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
- uniform.x.ToFloat32(), uniform.y.ToFloat32(), uniform.z.ToFloat32(),
- uniform.w.ToFloat32());
-
- // TODO: Verify that this actually modifies the register!
- uniform_setup.index.Assign(uniform_setup.index + 1);
- }
+ WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter,
+ vs_uniform_write_buffer, value);
break;
}
- // Load shader program code
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc):
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd):
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce):
@@ -409,12 +510,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1):
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): {
- g_state.vs.program_code[regs.vs.program.offset] = value;
- regs.vs.program.offset++;
+ WriteProgramCode(g_state.regs.vs, g_state.vs, 512, value);
break;
}
- // Load swizzle pattern data
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6):
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7):
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8):
@@ -423,8 +522,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db):
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): {
- g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value;
- regs.vs.swizzle_patterns.offset++;
+ WriteSwizzlePatterns(g_state.regs.vs, g_state.vs, value);
break;
}
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 38ea717ab..e156f6aef 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -24,6 +24,9 @@ namespace Pica {
namespace Shader {
+constexpr unsigned MAX_PROGRAM_CODE_LENGTH = 4096;
+constexpr unsigned MAX_SWIZZLE_DATA_LENGTH = 4096;
+
struct AttributeBuffer {
alignas(16) Math::Vec4<float24> attr[16];
};
@@ -144,8 +147,8 @@ struct ShaderSetup {
return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
}
- std::array<u32, 1024> program_code;
- std::array<u32, 1024> swizzle_data;
+ std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code;
+ std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data;
/// Data private to ShaderEngines
struct EngineData {
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index f4d1c46c5..aa1cec81f 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -653,7 +653,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData
}
void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
- ASSERT(entry_point < 1024);
+ ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.engine_data.entry_point = entry_point;
}
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 0ee0dd9ef..73c21871c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -15,7 +15,7 @@ JitX64Engine::JitX64Engine() = default;
JitX64Engine::~JitX64Engine() = default;
void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
- ASSERT(entry_point < 1024);
+ ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
setup.engine_data.entry_point = entry_point;
u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code));
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
index 2dbc8b147..5d9b6448c 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -834,8 +834,8 @@ void JitShader::FindReturnOffsets() {
std::sort(return_offsets.begin(), return_offsets.end());
}
-void JitShader::Compile(const std::array<u32, 1024>* program_code_,
- const std::array<u32, 1024>* swizzle_data_) {
+void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code_,
+ const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data_) {
program_code = program_code_;
swizzle_data = swizzle_data_;
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h
index f27675560..31af0ca48 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.h
+++ b/src/video_core/shader/shader_jit_x64_compiler.h
@@ -22,8 +22,8 @@ namespace Pica {
namespace Shader {
-/// Memory allocated for each compiled shader (64Kb)
-constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
+/// Memory allocated for each compiled shader
+constexpr size_t MAX_SHADER_SIZE = MAX_PROGRAM_CODE_LENGTH * 64;
/**
* This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
@@ -37,8 +37,8 @@ public:
program(&setup, &state, instruction_labels[offset].getAddress());
}
- void Compile(const std::array<u32, 1024>* program_code,
- const std::array<u32, 1024>* swizzle_data);
+ void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code,
+ const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data);
void Compile_ADD(Instruction instr);
void Compile_DP3(Instruction instr);
@@ -104,11 +104,11 @@ private:
*/
void FindReturnOffsets();
- const std::array<u32, 1024>* program_code = nullptr;
- const std::array<u32, 1024>* swizzle_data = nullptr;
+ const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code = nullptr;
+ const std::array<u32, MAX_SWIZZLE_DATA_LENGTH>* swizzle_data = nullptr;
/// Mapping of Pica VS instructions to pointers in the emitted code
- std::array<Xbyak::Label, 1024> instruction_labels;
+ std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels;
/// Offsets in code where a return needs to be inserted
std::vector<unsigned> return_offsets;