diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/core.cpp | 4 | ||||
-rw-r--r-- | src/core/hle/kernel/session.h | 10 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.cpp | 11 | ||||
-rw-r--r-- | src/core/hle/kernel/thread.h | 8 | ||||
-rw-r--r-- | src/video_core/pica.h | 67 | ||||
-rw-r--r-- | src/video_core/rasterizer.cpp | 49 | ||||
-rw-r--r-- | src/video_core/vertex_shader.cpp | 20 |
7 files changed, 143 insertions, 26 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index bb2ed7a92..b5c258230 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -61,10 +61,6 @@ int Init() { g_sys_core = new ARM_DynCom(USER32MODE); g_app_core = new ARM_DynCom(USER32MODE); - // TODO: Whenever TLS is implemented, this should contain - // the address of the 0x200-byte TLS - g_app_core->SetCP15Register(CP15_THREAD_URO, Memory::TLS_AREA_VADDR); - LOG_DEBUG(Core, "Initialized OK"); return 0; } diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h index 0fd18148a..8c3886ffd 100644 --- a/src/core/hle/kernel/session.h +++ b/src/core/hle/kernel/session.h @@ -5,6 +5,7 @@ #pragma once #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/thread.h" #include "core/mem_map.h" namespace Kernel { @@ -12,12 +13,15 @@ namespace Kernel { static const int kCommandHeaderOffset = 0x80; ///< Offset into command buffer of header /** - * Returns a pointer to the command buffer in kernel memory + * Returns a pointer to the command buffer in the current thread's TLS + * TODO(Subv): This is not entirely correct, the command buffer should be copied from + * the thread's TLS to an intermediate buffer in kernel memory, and then copied again to + * the service handler process' memory. * @param offset Optional offset into command buffer * @return Pointer to command buffer */ -inline static u32* GetCommandBuffer(const int offset=0) { - return (u32*)Memory::GetPointer(Memory::TLS_AREA_VADDR + kCommandHeaderOffset + offset); +inline static u32* GetCommandBuffer(const int offset = 0) { + return (u32*)Memory::GetPointer(GetCurrentThread()->GetTLSAddress() + kCommandHeaderOffset + offset); } /** diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 0a3fd7cb1..5de8f9a73 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -197,6 +197,7 @@ static void SwitchContext(Thread* new_thread) { new_thread->current_priority = new_thread->nominal_priority; Core::g_app_core->LoadContext(new_thread->context); + Core::g_app_core->SetCP15Register(CP15_THREAD_URO, new_thread->GetTLSAddress()); } else { current_thread = nullptr; } @@ -402,6 +403,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point, thread->name = std::move(name); thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom(); + VAddr tls_address = Memory::TLS_AREA_VADDR + (thread->thread_id - 1) * 0x200; + + ASSERT_MSG(tls_address < Memory::TLS_AREA_VADDR_END, "Too many threads"); + + thread->tls_address = tls_address; + // TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used // to initialize the context Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg); @@ -495,6 +502,10 @@ void Thread::SetWaitSynchronizationOutput(s32 output) { context.cpu_registers[1] = output; } +VAddr Thread::GetTLSAddress() const { + return tls_address; +} + //////////////////////////////////////////////////////////////////////////////////////////////////// void ThreadingInit() { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 9958b16e6..6891c8c2f 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -135,6 +135,12 @@ public: */ void Stop(); + /* + * Returns the Thread Local Storage address of the current thread + * @returns VAddr of the thread's TLS + */ + VAddr GetTLSAddress() const; + Core::ThreadContext context; u32 thread_id; @@ -150,6 +156,8 @@ public: s32 processor_id; + VAddr tls_address; ///< Address of the Thread Local Storage of the thread + /// Mutexes currently held by this thread, which will be released when it exits. boost::container::flat_set<SharedPtr<Mutex>> held_mutexes; diff --git a/src/video_core/pica.h b/src/video_core/pica.h index e4a91058c..5e169ff69 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -226,7 +226,8 @@ struct Regs { Texture1 = 0x4, Texture2 = 0x5, Texture3 = 0x6, - // 0x7-0xc = primary color?? + + PreviousBuffer = 0xd, Constant = 0xe, Previous = 0xf, }; @@ -299,7 +300,18 @@ struct Regs { BitField<24, 8, u32> const_a; }; - INSERT_PADDING_WORDS(0x1); + union { + BitField< 0, 2, u32> color_scale; + BitField<16, 2, u32> alpha_scale; + }; + + inline unsigned GetColorMultiplier() const { + return (color_scale < 3) ? (1 << color_scale) : 1; + } + + inline unsigned GetAlphaMultiplier() const { + return (alpha_scale < 3) ? (1 << alpha_scale) : 1; + } }; TevStageConfig tev_stage0; @@ -309,11 +321,36 @@ struct Regs { TevStageConfig tev_stage2; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage3; - INSERT_PADDING_WORDS(0x13); + INSERT_PADDING_WORDS(0x3); + + union { + // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in + // these masks are set + BitField< 8, 4, u32> update_mask_rgb; + BitField<12, 4, u32> update_mask_a; + + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (update_mask_rgb & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && (update_mask_a & (1 << stage_index)); + } + } tev_combiner_buffer_input; + + INSERT_PADDING_WORDS(0xf); TevStageConfig tev_stage4; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage5; - INSERT_PADDING_WORDS(0x3); + + union { + BitField< 0, 8, u32> r; + BitField< 8, 8, u32> g; + BitField<16, 8, u32> b; + BitField<24, 8, u32> a; + } tev_combiner_buffer_color; + + INSERT_PADDING_WORDS(0x2); const std::array<Regs::TevStageConfig,6> GetTevStages() const { return { tev_stage0, tev_stage1, @@ -426,9 +463,7 @@ struct Regs { D24S8 = 3 }; - /* - * Returns the number of bytes in the specified depth format - */ + // Returns the number of bytes in the specified depth format static u32 BytesPerDepthPixel(DepthFormat format) { switch (format) { case DepthFormat::D16: @@ -443,6 +478,20 @@ struct Regs { } } + // Returns the number of bits per depth component of the specified depth format + static u32 DepthBitsPerPixel(DepthFormat format) { + switch (format) { + case DepthFormat::D16: + return 16; + case DepthFormat::D24: + case DepthFormat::D24S8: + return 24; + default: + LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); + UNIMPLEMENTED(); + } + } + struct { // Components are laid out in reverse byte order, most significant bits first. enum ColorFormat : u32 { @@ -784,8 +833,10 @@ struct Regs { ADD_FIELD(tev_stage1); ADD_FIELD(tev_stage2); ADD_FIELD(tev_stage3); + ADD_FIELD(tev_combiner_buffer_input); ADD_FIELD(tev_stage4); ADD_FIELD(tev_stage5); + ADD_FIELD(tev_combiner_buffer_color); ADD_FIELD(output_merger); ADD_FIELD(framebuffer); ADD_FIELD(vertex_attributes); @@ -859,8 +910,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0); ASSERT_REG_POSITION(tev_stage1, 0xc8); ASSERT_REG_POSITION(tev_stage2, 0xd0); ASSERT_REG_POSITION(tev_stage3, 0xd8); +ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0); ASSERT_REG_POSITION(tev_stage4, 0xf0); ASSERT_REG_POSITION(tev_stage5, 0xf8); +ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); ASSERT_REG_POSITION(output_merger, 0x100); ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(vertex_attributes, 0x200); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 3b3fef484..46a326bb4 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -90,7 +90,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) { UNIMPLEMENTED(); } - return {}; + return {0, 0, 0, 0}; } static u32 GetDepth(int x, int y) { @@ -376,7 +376,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, // with some basic arithmetic. Alpha combiners can be configured separately but work // analogously. Math::Vec4<u8> combiner_output; - for (const auto& tev_stage : tev_stages) { + Math::Vec4<u8> combiner_buffer = { + registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g, + registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a + }; + + for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { + const auto& tev_stage = tev_stages[tev_stage_index]; using Source = Regs::TevStageConfig::Source; using ColorModifier = Regs::TevStageConfig::ColorModifier; using AlphaModifier = Regs::TevStageConfig::AlphaModifier; @@ -398,6 +404,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, case Source::Texture2: return texture_color[2]; + case Source::PreviousBuffer: + return combiner_buffer; + case Source::Constant: return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a}; @@ -407,7 +416,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, default: LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source); UNIMPLEMENTED(); - return {}; + return {0, 0, 0, 0}; } }; @@ -490,6 +499,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, return result.Cast<u8>(); } + case Operation::AddSigned: + { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct + auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128); + result.r() = MathUtil::Clamp<int>(result.r(), 0, 255); + result.g() = MathUtil::Clamp<int>(result.g(), 0, 255); + result.b() = MathUtil::Clamp<int>(result.b(), 0, 255); + return result.Cast<u8>(); + } + case Operation::Lerp: return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>(); @@ -524,7 +543,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, default: LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); UNIMPLEMENTED(); - return {}; + return {0, 0, 0}; } }; @@ -578,7 +597,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, }; auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result); - combiner_output = Math::MakeVec(color_output, alpha_output); + combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier()); + combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier()); + combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); + combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); + + if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { + combiner_buffer.r() = combiner_output.r(); + combiner_buffer.g() = combiner_output.g(); + combiner_buffer.b() = combiner_output.b(); + } + + if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { + combiner_buffer.a() = combiner_output.a(); + } } if (registers.output_merger.alpha_test.enable) { @@ -624,9 +656,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, // TODO: Does depth indeed only get written even if depth testing is enabled? if (registers.output_merger.depth_test_enable) { - u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 + - v1.screenpos[2].ToFloat32() * w1 + - v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); + unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format); + u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); u32 ref_z = GetDepth(x >> 4, y >> 4); bool pass = false; diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 51f4e58bf..885b7de59 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -235,6 +235,15 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case OpCode::Id::FLR: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32())); + } + break; + case OpCode::Id::MAX: for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) @@ -366,12 +375,15 @@ static void ProcessShaderCode(VertexShaderState& state) { case OpCode::Type::MultiplyAdd: { - if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) { + if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || + (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; - const float24* src1_ = LookupSourceRegister(instr.mad.src1); - const float24* src2_ = LookupSourceRegister(instr.mad.src2); - const float24* src3_ = LookupSourceRegister(instr.mad.src3); + bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); + + const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted)); + const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); |