diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/video_core/color.h | 215 | ||||
-rw-r--r-- | src/video_core/command_processor.cpp | 78 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 6 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 3 | ||||
-rw-r--r-- | src/video_core/math.h | 640 | ||||
-rw-r--r-- | src/video_core/pica.h | 93 | ||||
-rw-r--r-- | src/video_core/rasterizer.cpp | 80 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/generated/gl_3_2_core.c | 16 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 169 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shaders.h | 24 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 49 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 11 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 31 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 10 | ||||
-rw-r--r-- | src/video_core/vertex_shader.cpp | 33 |
18 files changed, 438 insertions, 1027 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0258a3255..5c7f4ae18 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -29,11 +29,9 @@ set(HEADERS renderer_opengl/pica_to_gl.h renderer_opengl/renderer_opengl.h clipper.h - color.h command_processor.h gpu_debugger.h hwrasterizer_base.h - math.h pica.h primitive_assembly.h rasterizer.h diff --git a/src/video_core/color.h b/src/video_core/color.h deleted file mode 100644 index 4d2026eb0..000000000 --- a/src/video_core/color.h +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/common_types.h" -#include "common/swap.h" - -#include "video_core/math.h" - -namespace Color { - -/// Convert a 1-bit color component to 8 bit -inline u8 Convert1To8(u8 value) { - return value * 255; -} - -/// Convert a 4-bit color component to 8 bit -inline u8 Convert4To8(u8 value) { - return (value << 4) | value; -} - -/// Convert a 5-bit color component to 8 bit -inline u8 Convert5To8(u8 value) { - return (value << 3) | (value >> 2); -} - -/// Convert a 6-bit color component to 8 bit -inline u8 Convert6To8(u8 value) { - return (value << 2) | (value >> 4); -} - -/// Convert a 8-bit color component to 1 bit -inline u8 Convert8To1(u8 value) { - return value >> 7; -} - -/// Convert a 8-bit color component to 4 bit -inline u8 Convert8To4(u8 value) { - return value >> 4; -} - -/// Convert a 8-bit color component to 5 bit -inline u8 Convert8To5(u8 value) { - return value >> 3; -} - -/// Convert a 8-bit color component to 6 bit -inline u8 Convert8To6(u8 value) { - return value >> 2; -} - -/** - * Decode a color stored in RGBA8 format - * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> - */ -inline const Math::Vec4<u8> DecodeRGBA8(const u8* bytes) { - return { bytes[3], bytes[2], bytes[1], bytes[0] }; -} - -/** - * Decode a color stored in RGB8 format - * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> - */ -inline const Math::Vec4<u8> DecodeRGB8(const u8* bytes) { - return { bytes[2], bytes[1], bytes[0], 255 }; -} - -/** - * Decode a color stored in RGB565 format - * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> - */ -inline const Math::Vec4<u8> DecodeRGB565(const u8* bytes) { - const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes); - return { Convert5To8((pixel >> 11) & 0x1F), Convert6To8((pixel >> 5) & 0x3F), - Convert5To8(pixel & 0x1F), 255 }; -} - -/** - * Decode a color stored in RGB5A1 format - * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> - */ -inline const Math::Vec4<u8> DecodeRGB5A1(const u8* bytes) { - const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes); - return { Convert5To8((pixel >> 11) & 0x1F), Convert5To8((pixel >> 6) & 0x1F), - Convert5To8((pixel >> 1) & 0x1F), Convert1To8(pixel & 0x1) }; -} - -/** - * Decode a color stored in RGBA4 format - * @param bytes Pointer to encoded source color - * @return Result color decoded as Math::Vec4<u8> - */ -inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) { - const u16_le pixel = *reinterpret_cast<const u16_le*>(bytes); - return { Convert4To8((pixel >> 12) & 0xF), Convert4To8((pixel >> 8) & 0xF), - Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF) }; -} - -/** - * Decode a depth value stored in D16 format - * @param bytes Pointer to encoded source value - * @return Depth value as an u32 - */ -inline u32 DecodeD16(const u8* bytes) { - return *reinterpret_cast<const u16_le*>(bytes); -} - -/** - * Decode a depth value stored in D24 format - * @param bytes Pointer to encoded source value - * @return Depth value as an u32 - */ -inline u32 DecodeD24(const u8* bytes) { - return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0]; -} - -/** - * Decode a depth value and a stencil value stored in D24S8 format - * @param bytes Pointer to encoded source values - * @return Resulting values stored as a Math::Vec2 - */ -inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) { - return { static_cast<u32>((bytes[2] << 16) | (bytes[1] << 8) | bytes[0]), bytes[3] }; -} - -/** - * Encode a color as RGBA8 format - * @param color Source color to encode - * @param bytes Destination pointer to store encoded color - */ -inline void EncodeRGBA8(const Math::Vec4<u8>& color, u8* bytes) { - bytes[3] = color.r(); - bytes[2] = color.g(); - bytes[1] = color.b(); - bytes[0] = color.a(); -} - -/** - * Encode a color as RGB8 format - * @param color Source color to encode - * @param bytes Destination pointer to store encoded color - */ -inline void EncodeRGB8(const Math::Vec4<u8>& color, u8* bytes) { - bytes[2] = color.r(); - bytes[1] = color.g(); - bytes[0] = color.b(); -} - -/** - * Encode a color as RGB565 format - * @param color Source color to encode - * @param bytes Destination pointer to store encoded color - */ -inline void EncodeRGB565(const Math::Vec4<u8>& color, u8* bytes) { - *reinterpret_cast<u16_le*>(bytes) = (Convert8To5(color.r()) << 11) | - (Convert8To6(color.g()) << 5) | Convert8To5(color.b()); -} - -/** - * Encode a color as RGB5A1 format - * @param color Source color to encode - * @param bytes Destination pointer to store encoded color - */ -inline void EncodeRGB5A1(const Math::Vec4<u8>& color, u8* bytes) { - *reinterpret_cast<u16_le*>(bytes) = (Convert8To5(color.r()) << 11) | - (Convert8To5(color.g()) << 6) | (Convert8To5(color.b()) << 1) | Convert8To1(color.a()); -} - -/** - * Encode a color as RGBA4 format - * @param color Source color to encode - * @param bytes Destination pointer to store encoded color - */ -inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) { - *reinterpret_cast<u16_le*>(bytes) = (Convert8To4(color.r()) << 12) | - (Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a()); -} - -/** - * Encode a 16 bit depth value as D16 format - * @param value 16 bit source depth value to encode - * @param bytes Pointer where to store the encoded value - */ -inline void EncodeD16(u32 value, u8* bytes) { - *reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF; -} - -/** - * Encode a 24 bit depth value as D24 format - * @param value 24 bit source depth value to encode - * @param bytes Pointer where to store the encoded value - */ -inline void EncodeD24(u32 value, u8* bytes) { - bytes[0] = value & 0xFF; - bytes[1] = (value >> 8) & 0xFF; - bytes[2] = (value >> 16) & 0xFF; -} - -/** - * Encode a 24 bit depth and 8 bit stencil values as D24S8 format - * @param depth 24 bit source depth value to encode - * @param stencil 8 bit source stencil value to encode - * @param bytes Pointer where to store the encoded value - */ -inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) { - *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth; -} - -} // namespace diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 100d8c7c1..b46fadd9f 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -56,7 +56,17 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // Trigger IRQ case PICA_REG_INDEX(trigger_irq): GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); - return; + break; + + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): + { + unsigned index = id - PICA_REG_INDEX(command_buffer.trigger[0]); + u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); + g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; + g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); + break; + } // It seems like these trigger vertex rendering case PICA_REG_INDEX(trigger_draw): @@ -136,7 +146,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); } - + // Load per-vertex data from the loader arrays for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { const u8* srcdata = Memory::GetPhysicalPointer(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]); @@ -193,7 +203,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { const Pica::VertexShader::OutputVertex& v2) { VideoCore::g_renderer->hw_rasterizer->AddTriangle(v0, v1, v2); }; - + primitive_assembler.SubmitVertex(output, AddHWTriangle); } else { // Send to triangle clipper @@ -282,7 +292,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } break; } - + // Load default vertex input attributes case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): @@ -306,7 +316,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; - + // NOTE: The destination component order indeed is "backwards" attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); @@ -363,38 +373,34 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id)); } -static std::ptrdiff_t ExecuteCommandBlock(const u32* first_command_word) { - const CommandHeader& header = *(const CommandHeader*)(&first_command_word[1]); - - u32* read_pointer = (u32*)first_command_word; - - const u32 write_mask = ((header.parameter_mask & 0x1) ? (0xFFu << 0) : 0u) | - ((header.parameter_mask & 0x2) ? (0xFFu << 8) : 0u) | - ((header.parameter_mask & 0x4) ? (0xFFu << 16) : 0u) | - ((header.parameter_mask & 0x8) ? (0xFFu << 24) : 0u); - - WritePicaReg(header.cmd_id, *read_pointer, write_mask); - read_pointer += 2; - - for (unsigned int i = 1; i < 1+header.extra_data_length; ++i) { - u32 cmd = header.cmd_id + ((header.group_commands) ? i : 0); - WritePicaReg(cmd, *read_pointer, write_mask); - ++read_pointer; - } - - // align read pointer to 8 bytes - if ((first_command_word - read_pointer) % 2) - ++read_pointer; - - return read_pointer - first_command_word; -} - void ProcessCommandList(const u32* list, u32 size) { - u32* read_pointer = (u32*)list; - u32 list_length = size / sizeof(u32); - - while (read_pointer < list + list_length) { - read_pointer += ExecuteCommandBlock(read_pointer); + g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = list; + g_state.cmd_list.length = size / sizeof(u32); + + while (g_state.cmd_list.current_ptr < g_state.cmd_list.head_ptr + g_state.cmd_list.length) { + // Expand a 4-bit mask to 4-byte mask, e.g. 0b0101 -> 0x00FF00FF + static const u32 expand_bits_to_bytes[] = { + 0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff, + 0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff, + 0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff, + 0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff + }; + + // Align read pointer to 8 bytes + if ((g_state.cmd_list.head_ptr - g_state.cmd_list.current_ptr) % 2 != 0) + ++g_state.cmd_list.current_ptr; + + u32 value = *g_state.cmd_list.current_ptr++; + const CommandHeader header = { *g_state.cmd_list.current_ptr++ }; + const u32 write_mask = expand_bits_to_bytes[header.parameter_mask]; + u32 cmd = header.cmd_id; + + WritePicaReg(cmd, value, write_mask); + + for (unsigned i = 0; i < header.extra_data_length; ++i) { + u32 cmd = header.cmd_id + (header.group_commands ? i + 1 : 0); + WritePicaReg(cmd, *g_state.cmd_list.current_ptr++, write_mask); + } } } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 7987b922c..7b8ab72b6 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -17,11 +17,11 @@ #include <nihstro/shader_binary.h> #include "common/assert.h" +#include "common/color.h" #include "common/file_util.h" #include "common/math_util.h" +#include "common/vector_math.h" -#include "video_core/color.h" -#include "video_core/math.h" #include "video_core/pica.h" #include "video_core/utils.h" #include "video_core/video_core.h" @@ -319,7 +319,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture // TODO(neobrain): Fix code design to unify vertical block offsets! source += coarse_y * info.stride; } - + // TODO: Assert that width/height are multiples of block dimensions switch (info.format) { diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index f361a5385..7926d64ec 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -12,7 +12,8 @@ #include <mutex> #include <vector> -#include "video_core/math.h" +#include "common/vector_math.h" + #include "video_core/pica.h" namespace Pica { diff --git a/src/video_core/math.h b/src/video_core/math.h deleted file mode 100644 index f9a822658..000000000 --- a/src/video_core/math.h +++ /dev/null @@ -1,640 +0,0 @@ -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - - -// Copyright 2014 Tony Wasserka -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of the owner nor the names of its contributors may -// be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#pragma once - -#include <cmath> - -namespace Math { - -template<typename T> class Vec2; -template<typename T> class Vec3; -template<typename T> class Vec4; - -template<typename T> -static inline Vec2<T> MakeVec(const T& x, const T& y); -template<typename T> -static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z); -template<typename T> -static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w); - - -template<typename T> -class Vec2 { -public: - T x; - T y; - - T* AsArray() { return &x; } - - Vec2() = default; - Vec2(const T a[2]) : x(a[0]), y(a[1]) {} - Vec2(const T& _x, const T& _y) : x(_x), y(_y) {} - - template<typename T2> - Vec2<T2> Cast() const { - return Vec2<T2>((T2)x, (T2)y); - } - - static Vec2 AssignToAll(const T& f) - { - return Vec2<T>(f, f); - } - - void Write(T a[2]) - { - a[0] = x; a[1] = y; - } - - Vec2<decltype(T{}+T{})> operator +(const Vec2& other) const - { - return MakeVec(x+other.x, y+other.y); - } - void operator += (const Vec2 &other) - { - x+=other.x; y+=other.y; - } - Vec2<decltype(T{}-T{})> operator -(const Vec2& other) const - { - return MakeVec(x-other.x, y-other.y); - } - void operator -= (const Vec2& other) - { - x-=other.x; y-=other.y; - } - Vec2<decltype(-T{})> operator -() const - { - return MakeVec(-x,-y); - } - Vec2<decltype(T{}*T{})> operator * (const Vec2& other) const - { - return MakeVec(x*other.x, y*other.y); - } - template<typename V> - Vec2<decltype(T{}*V{})> operator * (const V& f) const - { - return MakeVec(x*f,y*f); - } - template<typename V> - void operator *= (const V& f) - { - x*=f; y*=f; - } - template<typename V> - Vec2<decltype(T{}/V{})> operator / (const V& f) const - { - return MakeVec(x/f,y/f); - } - template<typename V> - void operator /= (const V& f) - { - *this = *this / f; - } - - T Length2() const - { - return x*x + y*y; - } - - // Only implemented for T=float - float Length() const; - void SetLength(const float l); - Vec2 WithLength(const float l) const; - float Distance2To(Vec2 &other); - Vec2 Normalized() const; - float Normalize(); // returns the previous length, which is often useful - - T& operator [] (int i) //allow vector[1] = 3 (vector.y=3) - { - return *((&x) + i); - } - T operator [] (const int i) const - { - return *((&x) + i); - } - - void SetZero() - { - x=0; y=0; - } - - // Common aliases: UV (texel coordinates), ST (texture coordinates) - T& u() { return x; } - T& v() { return y; } - T& s() { return x; } - T& t() { return y; } - - const T& u() const { return x; } - const T& v() const { return y; } - const T& s() const { return x; } - const T& t() const { return y; } - - // swizzlers - create a subvector of specific components - const Vec2 yx() const { return Vec2(y, x); } - const Vec2 vu() const { return Vec2(y, x); } - const Vec2 ts() const { return Vec2(y, x); } -}; - -template<typename T, typename V> -Vec2<T> operator * (const V& f, const Vec2<T>& vec) -{ - return Vec2<T>(f*vec.x,f*vec.y); -} - -typedef Vec2<float> Vec2f; - -template<typename T> -class Vec3 -{ -public: - T x; - T y; - T z; - - T* AsArray() { return &x; } - - Vec3() = default; - Vec3(const T a[3]) : x(a[0]), y(a[1]), z(a[2]) {} - Vec3(const T& _x, const T& _y, const T& _z) : x(_x), y(_y), z(_z) {} - - template<typename T2> - Vec3<T2> Cast() const { - return MakeVec<T2>((T2)x, (T2)y, (T2)z); - } - - // Only implemented for T=int and T=float - static Vec3 FromRGB(unsigned int rgb); - unsigned int ToRGB() const; // alpha bits set to zero - - static Vec3 AssignToAll(const T& f) - { - return MakeVec(f, f, f); - } - - void Write(T a[3]) - { - a[0] = x; a[1] = y; a[2] = z; - } - - Vec3<decltype(T{}+T{})> operator +(const Vec3 &other) const - { - return MakeVec(x+other.x, y+other.y, z+other.z); - } - void operator += (const Vec3 &other) - { - x+=other.x; y+=other.y; z+=other.z; - } - Vec3<decltype(T{}-T{})> operator -(const Vec3 &other) const - { - return MakeVec(x-other.x, y-other.y, z-other.z); - } - void operator -= (const Vec3 &other) - { - x-=other.x; y-=other.y; z-=other.z; - } - Vec3<decltype(-T{})> operator -() const - { - return MakeVec(-x,-y,-z); - } - Vec3<decltype(T{}*T{})> operator * (const Vec3 &other) const - { - return MakeVec(x*other.x, y*other.y, z*other.z); - } - template<typename V> - Vec3<decltype(T{}*V{})> operator * (const V& f) const - { - return MakeVec(x*f,y*f,z*f); - } - template<typename V> - void operator *= (const V& f) - { - x*=f; y*=f; z*=f; - } - template<typename V> - Vec3<decltype(T{}/V{})> operator / (const V& f) const - { - return MakeVec(x/f,y/f,z/f); - } - template<typename V> - void operator /= (const V& f) - { - *this = *this / f; - } - - T Length2() const - { - return x*x + y*y + z*z; - } - - // Only implemented for T=float - float Length() const; - void SetLength(const float l); - Vec3 WithLength(const float l) const; - float Distance2To(Vec3 &other); - Vec3 Normalized() const; - float Normalize(); // returns the previous length, which is often useful - - T& operator [] (int i) //allow vector[2] = 3 (vector.z=3) - { - return *((&x) + i); - } - T operator [] (const int i) const - { - return *((&x) + i); - } - - void SetZero() - { - x=0; y=0; z=0; - } - - // Common aliases: UVW (texel coordinates), RGB (colors), STQ (texture coordinates) - T& u() { return x; } - T& v() { return y; } - T& w() { return z; } - - T& r() { return x; } - T& g() { return y; } - T& b() { return z; } - - T& s() { return x; } - T& t() { return y; } - T& q() { return z; } - - const T& u() const { return x; } - const T& v() const { return y; } - const T& w() const { return z; } - - const T& r() const { return x; } - const T& g() const { return y; } - const T& b() const { return z; } - - const T& s() const { return x; } - const T& t() const { return y; } - const T& q() const { return z; } - - // swizzlers - create a subvector of specific components - // e.g. Vec2 uv() { return Vec2(x,y); } - // _DEFINE_SWIZZLER2 defines a single such function, DEFINE_SWIZZLER2 defines all of them for all component names (x<->r) and permutations (xy<->yx) -#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); } -#define DEFINE_SWIZZLER2(a, b, a2, b2, a3, b3, a4, b4) \ - _DEFINE_SWIZZLER2(a, b, a##b); \ - _DEFINE_SWIZZLER2(a, b, a2##b2); \ - _DEFINE_SWIZZLER2(a, b, a3##b3); \ - _DEFINE_SWIZZLER2(a, b, a4##b4); \ - _DEFINE_SWIZZLER2(b, a, b##a); \ - _DEFINE_SWIZZLER2(b, a, b2##a2); \ - _DEFINE_SWIZZLER2(b, a, b3##a3); \ - _DEFINE_SWIZZLER2(b, a, b4##a4) - - DEFINE_SWIZZLER2(x, y, r, g, u, v, s, t); - DEFINE_SWIZZLER2(x, z, r, b, u, w, s, q); - DEFINE_SWIZZLER2(y, z, g, b, v, w, t, q); -#undef DEFINE_SWIZZLER2 -#undef _DEFINE_SWIZZLER2 -}; - -template<typename T, typename V> -Vec3<T> operator * (const V& f, const Vec3<T>& vec) -{ - return Vec3<T>(f*vec.x,f*vec.y,f*vec.z); -} - -template<> -inline float Vec3<float>::Length() const { - return std::sqrt(x * x + y * y + z * z); -} - -template<> -inline Vec3<float> Vec3<float>::Normalized() const { - return *this / Length(); -} - - -typedef Vec3<float> Vec3f; - -template<typename T> -class Vec4 -{ -public: - T x; - T y; - T z; - T w; - - T* AsArray() { return &x; } - - Vec4() = default; - Vec4(const T a[4]) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {} - Vec4(const T& _x, const T& _y, const T& _z, const T& _w) : x(_x), y(_y), z(_z), w(_w) {} - - template<typename T2> - Vec4<T2> Cast() const { - return Vec4<T2>((T2)x, (T2)y, (T2)z, (T2)w); - } - - // Only implemented for T=int and T=float - static Vec4 FromRGBA(unsigned int rgba); - unsigned int ToRGBA() const; - - static Vec4 AssignToAll(const T& f) { - return Vec4<T>(f, f, f, f); - } - - void Write(T a[4]) - { - a[0] = x; a[1] = y; a[2] = z; a[3] = w; - } - - Vec4<decltype(T{}+T{})> operator +(const Vec4& other) const - { - return MakeVec(x+other.x, y+other.y, z+other.z, w+other.w); - } - void operator += (const Vec4& other) - { - x+=other.x; y+=other.y; z+=other.z; w+=other.w; - } - Vec4<decltype(T{}-T{})> operator -(const Vec4 &other) const - { - return MakeVec(x-other.x, y-other.y, z-other.z, w-other.w); - } - void operator -= (const Vec4 &other) - { - x-=other.x; y-=other.y; z-=other.z; w-=other.w; - } - Vec4<decltype(-T{})> operator -() const - { - return MakeVec(-x,-y,-z,-w); - } - Vec4<decltype(T{}*T{})> operator * (const Vec4 &other) const - { - return MakeVec(x*other.x, y*other.y, z*other.z, w*other.w); - } - template<typename V> - Vec4<decltype(T{}*V{})> operator * (const V& f) const - { - return MakeVec(x*f,y*f,z*f,w*f); - } - template<typename V> - void operator *= (const V& f) - { - x*=f; y*=f; z*=f; w*=f; - } - template<typename V> - Vec4<decltype(T{}/V{})> operator / (const V& f) const - { - return MakeVec(x/f,y/f,z/f,w/f); - } - template<typename V> - void operator /= (const V& f) - { - *this = *this / f; - } - - T Length2() const - { - return x*x + y*y + z*z + w*w; - } - - // Only implemented for T=float - float Length() const; - void SetLength(const float l); - Vec4 WithLength(const float l) const; - float Distance2To(Vec4 &other); - Vec4 Normalized() const; - float Normalize(); // returns the previous length, which is often useful - - T& operator [] (int i) //allow vector[2] = 3 (vector.z=3) - { - return *((&x) + i); - } - T operator [] (const int i) const - { - return *((&x) + i); - } - - void SetZero() - { - x=0; y=0; z=0; - } - - // Common alias: RGBA (colors) - T& r() { return x; } - T& g() { return y; } - T& b() { return z; } - T& a() { return w; } - - const T& r() const { return x; } - const T& g() const { return y; } - const T& b() const { return z; } - const T& a() const { return w; } - - // Swizzlers - Create a subvector of specific components - // e.g. Vec2 uv() { return Vec2(x,y); } - - // _DEFINE_SWIZZLER2 defines a single such function - // DEFINE_SWIZZLER2_COMP1 defines one-component functions for all component names (x<->r) - // DEFINE_SWIZZLER2_COMP2 defines two component functions for all component names (x<->r) and permutations (xy<->yx) -#define _DEFINE_SWIZZLER2(a, b, name) const Vec2<T> name() const { return Vec2<T>(a, b); } -#define DEFINE_SWIZZLER2_COMP1(a, a2) \ - _DEFINE_SWIZZLER2(a, a, a##a); \ - _DEFINE_SWIZZLER2(a, a, a2##a2) -#define DEFINE_SWIZZLER2_COMP2(a, b, a2, b2) \ - _DEFINE_SWIZZLER2(a, b, a##b); \ - _DEFINE_SWIZZLER2(a, b, a2##b2); \ - _DEFINE_SWIZZLER2(b, a, b##a); \ - _DEFINE_SWIZZLER2(b, a, b2##a2) - - DEFINE_SWIZZLER2_COMP2(x, y, r, g); - DEFINE_SWIZZLER2_COMP2(x, z, r, b); - DEFINE_SWIZZLER2_COMP2(x, w, r, a); - DEFINE_SWIZZLER2_COMP2(y, z, g, b); - DEFINE_SWIZZLER2_COMP2(y, w, g, a); - DEFINE_SWIZZLER2_COMP2(z, w, b, a); - DEFINE_SWIZZLER2_COMP1(x, r); - DEFINE_SWIZZLER2_COMP1(y, g); - DEFINE_SWIZZLER2_COMP1(z, b); - DEFINE_SWIZZLER2_COMP1(w, a); -#undef DEFINE_SWIZZLER2_COMP1 -#undef DEFINE_SWIZZLER2_COMP2 -#undef _DEFINE_SWIZZLER2 - -#define _DEFINE_SWIZZLER3(a, b, c, name) const Vec3<T> name() const { return Vec3<T>(a, b, c); } -#define DEFINE_SWIZZLER3_COMP1(a, a2) \ - _DEFINE_SWIZZLER3(a, a, a, a##a##a); \ - _DEFINE_SWIZZLER3(a, a, a, a2##a2##a2) -#define DEFINE_SWIZZLER3_COMP3(a, b, c, a2, b2, c2) \ - _DEFINE_SWIZZLER3(a, b, c, a##b##c); \ - _DEFINE_SWIZZLER3(a, c, b, a##c##b); \ - _DEFINE_SWIZZLER3(b, a, c, b##a##c); \ - _DEFINE_SWIZZLER3(b, c, a, b##c##a); \ - _DEFINE_SWIZZLER3(c, a, b, c##a##b); \ - _DEFINE_SWIZZLER3(c, b, a, c##b##a); \ - _DEFINE_SWIZZLER3(a, b, c, a2##b2##c2); \ - _DEFINE_SWIZZLER3(a, c, b, a2##c2##b2); \ - _DEFINE_SWIZZLER3(b, a, c, b2##a2##c2); \ - _DEFINE_SWIZZLER3(b, c, a, b2##c2##a2); \ - _DEFINE_SWIZZLER3(c, a, b, c2##a2##b2); \ - _DEFINE_SWIZZLER3(c, b, a, c2##b2##a2) - - DEFINE_SWIZZLER3_COMP3(x, y, z, r, g, b); - DEFINE_SWIZZLER3_COMP3(x, y, w, r, g, a); - DEFINE_SWIZZLER3_COMP3(x, z, w, r, b, a); - DEFINE_SWIZZLER3_COMP3(y, z, w, g, b, a); - DEFINE_SWIZZLER3_COMP1(x, r); - DEFINE_SWIZZLER3_COMP1(y, g); - DEFINE_SWIZZLER3_COMP1(z, b); - DEFINE_SWIZZLER3_COMP1(w, a); -#undef DEFINE_SWIZZLER3_COMP1 -#undef DEFINE_SWIZZLER3_COMP3 -#undef _DEFINE_SWIZZLER3 -}; - - -template<typename T, typename V> -Vec4<decltype(V{}*T{})> operator * (const V& f, const Vec4<T>& vec) -{ - return MakeVec(f*vec.x,f*vec.y,f*vec.z,f*vec.w); -} - -typedef Vec4<float> Vec4f; - - -template<typename T> -static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec2<T>& a, const Vec2<T>& b) -{ - return a.x*b.x + a.y*b.y; -} - -template<typename T> -static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec3<T>& a, const Vec3<T>& b) -{ - return a.x*b.x + a.y*b.y + a.z*b.z; -} - -template<typename T> -static inline decltype(T{}*T{}+T{}*T{}) Dot(const Vec4<T>& a, const Vec4<T>& b) -{ - return a.x*b.x + a.y*b.y + a.z*b.z + a.w*b.w; -} - -template<typename T> -static inline Vec3<decltype(T{}*T{}-T{}*T{})> Cross(const Vec3<T>& a, const Vec3<T>& b) -{ - return MakeVec(a.y*b.z-a.z*b.y, a.z*b.x-a.x*b.z, a.x*b.y-a.y*b.x); -} - -// linear interpolation via float: 0.0=begin, 1.0=end -template<typename X> -static inline decltype(X{}*float{}+X{}*float{}) Lerp(const X& begin, const X& end, const float t) -{ - return begin*(1.f-t) + end*t; -} - -// linear interpolation via int: 0=begin, base=end -template<typename X, int base> -static inline decltype((X{}*int{}+X{}*int{}) / base) LerpInt(const X& begin, const X& end, const int t) -{ - return (begin*(base-t) + end*t) / base; -} - -// Utility vector factories -template<typename T> -static inline Vec2<T> MakeVec(const T& x, const T& y) -{ - return Vec2<T>{x, y}; -} - -template<typename T> -static inline Vec3<T> MakeVec(const T& x, const T& y, const T& z) -{ - return Vec3<T>{x, y, z}; -} - -template<typename T> -static inline Vec4<T> MakeVec(const T& x, const T& y, const Vec2<T>& zw) -{ - return MakeVec(x, y, zw[0], zw[1]); -} - -template<typename T> -static inline Vec3<T> MakeVec(const Vec2<T>& xy, const T& z) -{ - return MakeVec(xy[0], xy[1], z); -} - -template<typename T> -static inline Vec3<T> MakeVec(const T& x, const Vec2<T>& yz) -{ - return MakeVec(x, yz[0], yz[1]); -} - -template<typename T> -static inline Vec4<T> MakeVec(const T& x, const T& y, const T& z, const T& w) -{ - return Vec4<T>{x, y, z, w}; -} - -template<typename T> -static inline Vec4<T> MakeVec(const Vec2<T>& xy, const T& z, const T& w) -{ - return MakeVec(xy[0], xy[1], z, w); -} - -template<typename T> -static inline Vec4<T> MakeVec(const T& x, const Vec2<T>& yz, const T& w) -{ - return MakeVec(x, yz[0], yz[1], w); -} - -// NOTE: This has priority over "Vec2<Vec2<T>> MakeVec(const Vec2<T>& x, const Vec2<T>& y)". -// Even if someone wanted to use an odd object like Vec2<Vec2<T>>, the compiler would error -// out soon enough due to misuse of the returned structure. -template<typename T> -static inline Vec4<T> MakeVec(const Vec2<T>& xy, const Vec2<T>& zw) -{ - return MakeVec(xy[0], xy[1], zw[0], zw[1]); -} - -template<typename T> -static inline Vec4<T> MakeVec(const Vec3<T>& xyz, const T& w) -{ - return MakeVec(xyz[0], xyz[1], xyz[2], w); -} - -template<typename T> -static inline Vec4<T> MakeVec(const T& x, const Vec3<T>& yzw) -{ - return MakeVec(x, yzw[0], yzw[1], yzw[2]); -} - - -} // namespace diff --git a/src/video_core/pica.h b/src/video_core/pica.h index b67dce1a9..9628a7589 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -15,8 +15,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/logging/log.h" - -#include "math.h" +#include "common/vector_math.h" namespace Pica { @@ -162,6 +161,25 @@ struct Regs { ETC1A4 = 13, // compressed }; + enum class LogicOp : u32 { + Clear = 0, + And = 1, + AndReverse = 2, + Copy = 3, + Set = 4, + CopyInverted = 5, + NoOp = 6, + Invert = 7, + Nand = 8, + Or = 9, + Nor = 10, + Xor = 11, + Equiv = 12, + AndInverted = 13, + OrReverse = 14, + OrInverted = 15, + }; + static unsigned NibblesPerPixel(TextureFormat format) { switch (format) { case TextureFormat::RGBA8: @@ -221,6 +239,7 @@ struct Regs { enum class Source : u32 { PrimaryColor = 0x0, PrimaryFragmentColor = 0x1, + SecondaryFragmentColor = 0x2, Texture0 = 0x3, Texture1 = 0x4, @@ -337,7 +356,7 @@ struct Regs { return (stage_index < 4) && (update_mask_a & (1 << stage_index)); } } tev_combiner_buffer_input; - + INSERT_PADDING_WORDS(0xf); TevStageConfig tev_stage4; INSERT_PADDING_WORDS(0x3); @@ -353,9 +372,9 @@ struct Regs { INSERT_PADDING_WORDS(0x2); const std::array<Regs::TevStageConfig,6> GetTevStages() const { - return { tev_stage0, tev_stage1, - tev_stage2, tev_stage3, - tev_stage4, tev_stage5 }; + return {{ tev_stage0, tev_stage1, + tev_stage2, tev_stage3, + tev_stage4, tev_stage5 }}; }; enum class BlendEquation : u32 { @@ -413,12 +432,8 @@ struct Regs { } alpha_blending; union { - enum Op { - Set = 4, - }; - - BitField<0, 4, Op> op; - } logic_op; + BitField<0, 4, LogicOp> logic_op; + }; union { BitField< 0, 8, u32> r; @@ -703,12 +718,38 @@ struct Regs { struct { // Index of the current default attribute u32 index; - + // Writing to these registers sets the "current" default attribute. u32 set_value[3]; } vs_default_attributes_setup; - - INSERT_PADDING_WORDS(0x28); + + INSERT_PADDING_WORDS(0x2); + + struct { + // There are two channels that can be used to configure the next command buffer, which + // can be then executed by writing to the "trigger" registers. There are two reasons why a + // game might use this feature: + // 1) With this, an arbitrary number of additional command buffers may be executed in + // sequence without requiring any intervention of the CPU after the initial one is + // kicked off. + // 2) Games can configure these registers to provide a command list subroutine mechanism. + + BitField< 0, 20, u32> size[2]; ///< Size (in bytes / 8) of each channel's command buffer + BitField< 0, 28, u32> addr[2]; ///< Physical address / 8 of each channel's command buffer + u32 trigger[2]; ///< Triggers execution of the channel's command buffer when written to + + unsigned GetSize(unsigned index) const { + ASSERT(index < 2); + return 8 * size[index]; + } + + PAddr GetPhysicalAddress(unsigned index) const { + ASSERT(index < 2); + return (PAddr)(8 * addr[index]); + } + } command_buffer; + + INSERT_PADDING_WORDS(0x20); enum class TriangleTopology : u32 { List = 0, @@ -861,6 +902,7 @@ struct Regs { ADD_FIELD(trigger_draw); ADD_FIELD(trigger_draw_indexed); ADD_FIELD(vs_default_attributes_setup); + ADD_FIELD(command_buffer); ADD_FIELD(triangle_topology); ADD_FIELD(vs_bool_uniforms); ADD_FIELD(vs_int_uniforms); @@ -938,6 +980,7 @@ ASSERT_REG_POSITION(num_vertices, 0x228); ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); +ASSERT_REG_POSITION(command_buffer, 0x238); ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); @@ -1053,21 +1096,12 @@ private: float value; }; -union CommandHeader { - CommandHeader(u32 h) : hex(h) {} - - u32 hex; - - BitField< 0, 16, u32> cmd_id; - BitField<16, 4, u32> parameter_mask; - BitField<20, 11, u32> extra_data_length; - BitField<31, 1, u32> group_commands; -}; - /// Struct used to describe current Pica state struct State { + /// Pica registers Regs regs; + /// Vertex shader memory struct { struct { Math::Vec4<float24> f[96]; @@ -1080,6 +1114,13 @@ struct State { std::array<u32, 1024> program_code; std::array<u32, 1024> swizzle_data; } vs; + + /// Current Pica command list + struct { + const u32* head_ptr; + const u32* current_ptr; + u32 length; + } cmd_list; }; /// Initialize Pica state diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 767ff4205..59d156ee7 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -4,6 +4,7 @@ #include <algorithm> +#include "common/color.h" #include "common/common_types.h" #include "common/math_util.h" #include "common/profiler.h" @@ -13,7 +14,6 @@ #include "debug_utils/debug_utils.h" #include "math.h" -#include "color.h" #include "pica.h" #include "rasterizer.h" #include "vertex_shader.h" @@ -104,7 +104,7 @@ static u32 GetDepth(int x, int y) { u8* depth_buffer = Memory::GetPhysicalPointer(addr); y = framebuffer.height - y; - + const u32 coarse_y = y & ~7; u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format); u32 stride = framebuffer.width * bytes_per_pixel; @@ -402,11 +402,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, auto GetSource = [&](Source source) -> Math::Vec4<u8> { switch (source) { - // TODO: What's the difference between these two? case Source::PrimaryColor: + + // HACK: Until we implement fragment lighting, use primary_color case Source::PrimaryFragmentColor: return primary_color; + // HACK: Until we implement fragment lighting, use zero + case Source::SecondaryFragmentColor: + return {0, 0, 0, 0}; + case Source::Texture0: return texture_color[0]; @@ -570,6 +575,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, case Operation::Add: return std::min(255, input[0] + input[1]); + case Operation::AddSigned: + { + // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct + auto result = static_cast<int>(input[0]) + static_cast<int>(input[1]) - 128; + return static_cast<u8>(MathUtil::Clamp<int>(result, 0, 255)); + } + case Operation::Lerp: return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; @@ -808,10 +820,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } }; - using BlendEquation = Regs::BlendEquation; static auto EvaluateBlendEquation = [](const Math::Vec4<u8>& src, const Math::Vec4<u8>& srcfactor, const Math::Vec4<u8>& dest, const Math::Vec4<u8>& destfactor, - BlendEquation equation) { + Regs::BlendEquation equation) { Math::Vec4<int> result; auto src_result = (src * srcfactor).Cast<int>(); @@ -866,8 +877,63 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); } else { - LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op); - UNIMPLEMENTED(); + static auto LogicOp = [](u8 src, u8 dest, Regs::LogicOp op) -> u8 { + switch (op) { + case Regs::LogicOp::Clear: + return 0; + + case Regs::LogicOp::And: + return src & dest; + + case Regs::LogicOp::AndReverse: + return src & ~dest; + + case Regs::LogicOp::Copy: + return src; + + case Regs::LogicOp::Set: + return 255; + + case Regs::LogicOp::CopyInverted: + return ~src; + + case Regs::LogicOp::NoOp: + return dest; + + case Regs::LogicOp::Invert: + return ~dest; + + case Regs::LogicOp::Nand: + return ~(src & dest); + + case Regs::LogicOp::Or: + return src | dest; + + case Regs::LogicOp::Nor: + return ~(src | dest); + + case Regs::LogicOp::Xor: + return src ^ dest; + + case Regs::LogicOp::Equiv: + return ~(src ^ dest); + + case Regs::LogicOp::AndInverted: + return ~src & dest; + + case Regs::LogicOp::OrReverse: + return src | ~dest; + + case Regs::LogicOp::OrInverted: + return ~src | dest; + } + }; + + blend_output = Math::MakeVec( + LogicOp(combiner_output.r(), dest.r(), output_merger.logic_op), + LogicOp(combiner_output.g(), dest.g(), output_merger.logic_op), + LogicOp(combiner_output.b(), dest.b(), output_merger.logic_op), + LogicOp(combiner_output.a(), dest.a(), output_merger.logic_op)); } const Math::Vec4<u8> result = { diff --git a/src/video_core/renderer_opengl/generated/gl_3_2_core.c b/src/video_core/renderer_opengl/generated/gl_3_2_core.c index ef29972d7..95fd29c0a 100644 --- a/src/video_core/renderer_opengl/generated/gl_3_2_core.c +++ b/src/video_core/renderer_opengl/generated/gl_3_2_core.c @@ -62,9 +62,9 @@ static int TestPointer(const PROC pTest) ptrdiff_t iTest; if(!pTest) return 0; iTest = (ptrdiff_t)pTest; - + if(iTest == 1 || iTest == 2 || iTest == 3 || iTest == -1) return 0; - + return 1; } @@ -79,7 +79,7 @@ static PROC WinGetProcAddress(const char *name) glMod = GetModuleHandleA("OpenGL32.dll"); return (PROC)GetProcAddress(glMod, (LPCSTR)name); } - + #define IntGetProcAddress(name) WinGetProcAddress(name) #else #if defined(__APPLE__) @@ -1083,7 +1083,7 @@ static ogl_StrToExtMap *FindExtEntry(const char *extensionName) if(strcmp(extensionName, currLoc->extensionName) == 0) return currLoc; } - + return NULL; } @@ -1135,15 +1135,15 @@ int ogl_LoadFunctions() { int numFailed = 0; ClearExtensionVars(); - + _ptrc_glGetIntegerv = (void (CODEGEN_FUNCPTR *)(GLenum, GLint *))IntGetProcAddress("glGetIntegerv"); if(!_ptrc_glGetIntegerv) return ogl_LOAD_FAILED; _ptrc_glGetStringi = (const GLubyte * (CODEGEN_FUNCPTR *)(GLenum, GLuint))IntGetProcAddress("glGetStringi"); if(!_ptrc_glGetStringi) return ogl_LOAD_FAILED; - + ProcExtsFromExtList(); numFailed = Load_Version_3_2(); - + if(numFailed == 0) return ogl_LOAD_SUCCEEDED; else @@ -1177,7 +1177,7 @@ int ogl_IsVersionGEQ(int majorVersion, int minorVersion) { if(g_major_version == 0) GetGLVersion(); - + if(majorVersion > g_major_version) return 1; if(majorVersion < g_major_version) return 0; if(minorVersion >= g_minor_version) return 1; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4b7d099a5..518f79331 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -2,10 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/color.h" + #include "core/settings.h" #include "core/hw/gpu.h" -#include "video_core/color.h" #include "video_core/pica.h" #include "video_core/utils.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -93,14 +94,27 @@ void RasterizerOpenGL::InitObjects() { // Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation fb_color_texture.texture.Create(); ReconfigureColorTexture(fb_color_texture, Pica::Regs::ColorFormat::RGBA8, 1, 1); + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = fb_color_texture.texture.handle; + state.Apply(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + state.texture_units[0].texture_2d = 0; + state.Apply(); + fb_depth_texture.texture.Create(); ReconfigureDepthTexture(fb_depth_texture, Pica::Regs::DepthFormat::D16, 1, 1); + + state.texture_units[0].enabled_2d = true; + state.texture_units[0].texture_2d = fb_depth_texture.texture.handle; + state.Apply(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -109,14 +123,13 @@ void RasterizerOpenGL::InitObjects() { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_FUNC, GL_LEQUAL); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_COMPARE_MODE, GL_NONE); + state.texture_units[0].texture_2d = 0; + state.Apply(); + // Configure OpenGL framebuffer framebuffer.Create(); state.draw.framebuffer = framebuffer.handle; - - // Unbind texture to allow binding to framebuffer - state.texture_units[0].enabled_2d = true; - state.texture_units[0].texture_2d = 0; state.Apply(); glActiveTexture(GL_TEXTURE0); @@ -135,6 +148,7 @@ void RasterizerOpenGL::Reset() { SyncBlendFuncs(); SyncBlendColor(); SyncAlphaTest(); + SyncLogicOp(); SyncStencilTest(); SyncDepthTest(); @@ -203,7 +217,19 @@ void RasterizerOpenGL::DrawTriangles() { vertex_batch.clear(); - // TODO: Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture + // Flush the resource cache at the current depth and color framebuffer addresses for render-to-texture + const auto& regs = Pica::g_state.regs; + + PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); + u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) + * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + + PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); + u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) + * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); + + res_cache.NotifyFlush(cur_fb_color_addr, cur_fb_color_size); + res_cache.NotifyFlush(cur_fb_depth_addr, cur_fb_depth_size); } void RasterizerOpenGL::CommitFramebuffer() { @@ -249,6 +275,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncDepthTest(); break; + // Logic op + case PICA_REG_INDEX(output_merger.logic_op): + SyncLogicOp(); + break; + // TEV stage 0 case PICA_REG_INDEX(tev_stage0.color_source1): SyncTevSources(0, regs.tev_stage0); @@ -350,7 +381,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX(tev_stage5.color_scale): SyncTevMultipliers(5, regs.tev_stage5); break; - + // TEV combiner buffer color case PICA_REG_INDEX(tev_combiner_buffer_color): SyncCombinerColor(); @@ -465,6 +496,9 @@ void RasterizerOpenGL::ReconfigureColorTexture(TextureInfo& texture, Pica::Regs: glActiveTexture(GL_TEXTURE0); glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, texture.gl_format, texture.gl_type, nullptr); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica::Regs::DepthFormat format, u32 width, u32 height) { @@ -484,7 +518,7 @@ void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica:: case Pica::Regs::DepthFormat::D24: internal_format = GL_DEPTH_COMPONENT24; texture.gl_format = GL_DEPTH_COMPONENT; - texture.gl_type = GL_UNSIGNED_INT_24_8; + texture.gl_type = GL_UNSIGNED_INT; break; case Pica::Regs::DepthFormat::D24S8: @@ -506,6 +540,9 @@ void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica:: glActiveTexture(GL_TEXTURE0); glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0, texture.gl_format, texture.gl_type, nullptr); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } void RasterizerOpenGL::SyncFramebuffer() { @@ -633,6 +670,10 @@ void RasterizerOpenGL::SyncAlphaTest() { glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); } +void RasterizerOpenGL::SyncLogicOp() { + state.logic_op = PicaToGL::LogicOp(Pica::g_state.regs.output_merger.logic_op); +} + void RasterizerOpenGL::SyncStencilTest() { // TODO: Implement stencil test, mask, and op } @@ -641,6 +682,10 @@ void RasterizerOpenGL::SyncDepthTest() { const auto& regs = Pica::g_state.regs; state.depth.test_enabled = (regs.output_merger.depth_test_enable == 1); state.depth.test_func = PicaToGL::CompareFunc(regs.output_merger.depth_test_func); + state.color_mask.red_enabled = regs.output_merger.red_enable; + state.color_mask.green_enabled = regs.output_merger.green_enable; + state.color_mask.blue_enabled = regs.output_merger.blue_enable; + state.color_mask.alpha_enabled = regs.output_merger.alpha_enable; state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; } @@ -748,10 +793,10 @@ void RasterizerOpenGL::ReloadColorBuffer() { for (int x = 0; x < fb_color_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_px_idx = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_color_texture.width) * bytes_per_pixel; u8* pixel = color_buffer + dst_offset; - memcpy(&temp_fb_color_buffer[gl_px_idx], pixel, bytes_per_pixel); + memcpy(&temp_fb_color_buffer[gl_pixel_index], pixel, bytes_per_pixel); } } @@ -762,6 +807,9 @@ void RasterizerOpenGL::ReloadColorBuffer() { glActiveTexture(GL_TEXTURE0); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_color_texture.width, fb_color_texture.height, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_fb_color_buffer.get()); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } void RasterizerOpenGL::ReloadDepthBuffer() { @@ -779,29 +827,29 @@ void RasterizerOpenGL::ReloadDepthBuffer() { std::unique_ptr<u8[]> temp_fb_depth_buffer(new u8[fb_depth_texture.width * fb_depth_texture.height * gl_bpp]); - for (int y = 0; y < fb_depth_texture.height; ++y) { - for (int x = 0; x < fb_depth_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_px_idx = x + y * fb_depth_texture.width; - - switch (fb_depth_texture.format) { - case Pica::Regs::DepthFormat::D16: - ((u16*)temp_fb_depth_buffer.get())[gl_px_idx] = Color::DecodeD16(depth_buffer + dst_offset); - break; - case Pica::Regs::DepthFormat::D24: - ((u32*)temp_fb_depth_buffer.get())[gl_px_idx] = Color::DecodeD24(depth_buffer + dst_offset); - break; - case Pica::Regs::DepthFormat::D24S8: - { - Math::Vec2<u32> depth_stencil = Color::DecodeD24S8(depth_buffer + dst_offset); - ((u32*)temp_fb_depth_buffer.get())[gl_px_idx] = (depth_stencil.x << 8) | depth_stencil.y; - break; + u8* temp_fb_depth_data = bytes_per_pixel == 3 ? (temp_fb_depth_buffer.get() + 1) : temp_fb_depth_buffer.get(); + + if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_depth_texture.width); + + u8* pixel = depth_buffer + dst_offset; + u32 depth_stencil = *(u32*)pixel; + ((u32*)temp_fb_depth_data)[gl_pixel_index] = (depth_stencil << 8) | (depth_stencil >> 24); } - default: - LOG_CRITICAL(Render_OpenGL, "Unknown memory framebuffer depth format %x", fb_depth_texture.format); - UNIMPLEMENTED(); - break; + } + } else { + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_depth_texture.width) * gl_bpp; + + u8* pixel = depth_buffer + dst_offset; + memcpy(&temp_fb_depth_data[gl_pixel_index], pixel, bytes_per_pixel); } } } @@ -813,6 +861,9 @@ void RasterizerOpenGL::ReloadDepthBuffer() { glActiveTexture(GL_TEXTURE0); glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, fb_depth_texture.width, fb_depth_texture.height, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_fb_depth_buffer.get()); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } void RasterizerOpenGL::CommitColorBuffer() { @@ -831,15 +882,18 @@ void RasterizerOpenGL::CommitColorBuffer() { glActiveTexture(GL_TEXTURE0); glGetTexImage(GL_TEXTURE_2D, 0, fb_color_texture.gl_format, fb_color_texture.gl_type, temp_gl_color_buffer.get()); + state.texture_units[0].texture_2d = 0; + state.Apply(); + // Directly copy pixels. Internal OpenGL color formats are consistent so no conversion is necessary. for (int y = 0; y < fb_color_texture.height; ++y) { for (int x = 0; x < fb_color_texture.width; ++x) { const u32 coarse_y = y & ~7; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_color_texture.width * bytes_per_pixel; - u32 gl_px_idx = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; + u32 gl_pixel_index = x * bytes_per_pixel + y * fb_color_texture.width * bytes_per_pixel; u8* pixel = color_buffer + dst_offset; - memcpy(pixel, &temp_gl_color_buffer[gl_px_idx], bytes_per_pixel); + memcpy(pixel, &temp_gl_color_buffer[gl_pixel_index], bytes_per_pixel); } } } @@ -866,29 +920,32 @@ void RasterizerOpenGL::CommitDepthBuffer() { glActiveTexture(GL_TEXTURE0); glGetTexImage(GL_TEXTURE_2D, 0, fb_depth_texture.gl_format, fb_depth_texture.gl_type, temp_gl_depth_buffer.get()); - for (int y = 0; y < fb_depth_texture.height; ++y) { - for (int x = 0; x < fb_depth_texture.width; ++x) { - const u32 coarse_y = y & ~7; - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; - u32 gl_px_idx = x + y * fb_depth_texture.width; - - switch (fb_depth_texture.format) { - case Pica::Regs::DepthFormat::D16: - Color::EncodeD16(((u16*)temp_gl_depth_buffer.get())[gl_px_idx], depth_buffer + dst_offset); - break; - case Pica::Regs::DepthFormat::D24: - Color::EncodeD24(((u32*)temp_gl_depth_buffer.get())[gl_px_idx], depth_buffer + dst_offset); - break; - case Pica::Regs::DepthFormat::D24S8: - { - u32 depth_stencil = ((u32*)temp_gl_depth_buffer.get())[gl_px_idx]; - Color::EncodeD24S8((depth_stencil >> 8), depth_stencil & 0xFF, depth_buffer + dst_offset); - break; + state.texture_units[0].texture_2d = 0; + state.Apply(); + + u8* temp_gl_depth_data = bytes_per_pixel == 3 ? (temp_gl_depth_buffer.get() + 1) : temp_gl_depth_buffer.get(); + + if (fb_depth_texture.format == Pica::Regs::DepthFormat::D24S8) { + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_depth_texture.width); + + u8* pixel = depth_buffer + dst_offset; + u32 depth_stencil = ((u32*)temp_gl_depth_data)[gl_pixel_index]; + *(u32*)pixel = (depth_stencil >> 8) | (depth_stencil << 24); } - default: - LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer depth format %x", fb_depth_texture.format); - UNIMPLEMENTED(); - break; + } + } else { + for (int y = 0; y < fb_depth_texture.height; ++y) { + for (int x = 0; x < fb_depth_texture.width; ++x) { + const u32 coarse_y = y & ~7; + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * fb_depth_texture.width * bytes_per_pixel; + u32 gl_pixel_index = (x + y * fb_depth_texture.width) * gl_bpp; + + u8* pixel = depth_buffer + dst_offset; + memcpy(pixel, &temp_gl_depth_data[gl_pixel_index], bytes_per_pixel); } } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9896f8d04..d7d422b1f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -125,6 +125,9 @@ private: /// Syncs the alpha test states to match the PICA register void SyncAlphaTest(); + /// Syncs the logic op states to match the PICA register + void SyncLogicOp(); + /// Syncs the stencil test states to match the PICA register void SyncStencilTest(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 6f88a8b21..2e4110a88 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -4,13 +4,13 @@ #include "common/make_unique.h" #include "common/math_util.h" +#include "common/vector_math.h" #include "core/memory.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/math.h" RasterizerCacheOpenGL::~RasterizerCacheOpenGL() { FullFlush(); diff --git a/src/video_core/renderer_opengl/gl_shaders.h b/src/video_core/renderer_opengl/gl_shaders.h index 8f0941230..a8cb2f595 100644 --- a/src/video_core/renderer_opengl/gl_shaders.h +++ b/src/video_core/renderer_opengl/gl_shaders.h @@ -69,15 +69,16 @@ const char g_fragment_shader_hw[] = R"( #define NUM_VTX_ATTR 7 #define NUM_TEV_STAGES 6 -#define SOURCE_PRIMARYCOLOR 0x0 -#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1 -#define SOURCE_TEXTURE0 0x3 -#define SOURCE_TEXTURE1 0x4 -#define SOURCE_TEXTURE2 0x5 -#define SOURCE_TEXTURE3 0x6 -#define SOURCE_PREVIOUSBUFFER 0xd -#define SOURCE_CONSTANT 0xe -#define SOURCE_PREVIOUS 0xf +#define SOURCE_PRIMARYCOLOR 0x0 +#define SOURCE_PRIMARYFRAGMENTCOLOR 0x1 +#define SOURCE_SECONDARYFRAGMENTCOLOR 0x2 +#define SOURCE_TEXTURE0 0x3 +#define SOURCE_TEXTURE1 0x4 +#define SOURCE_TEXTURE2 0x5 +#define SOURCE_TEXTURE3 0x6 +#define SOURCE_PREVIOUSBUFFER 0xd +#define SOURCE_CONSTANT 0xe +#define SOURCE_PREVIOUS 0xf #define COLORMODIFIER_SOURCECOLOR 0x0 #define COLORMODIFIER_ONEMINUSSOURCECOLOR 0x1 @@ -151,8 +152,11 @@ vec4 GetSource(int source) { if (source == SOURCE_PRIMARYCOLOR) { return o[2]; } else if (source == SOURCE_PRIMARYFRAGMENTCOLOR) { - // HACK: Uses color value, but should really use fragment lighting output + // HACK: Until we implement fragment lighting, use primary_color return o[2]; + } else if (source == SOURCE_SECONDARYFRAGMENTCOLOR) { + // HACK: Until we implement fragment lighting, use zero + return vec4(0.0, 0.0, 0.0, 0.0); } else if (source == SOURCE_TEXTURE0) { return texture(tex[0], o[3].xy); } else if (source == SOURCE_TEXTURE1) { diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 1afa58c99..3526e16d5 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -16,6 +16,11 @@ OpenGLState::OpenGLState() { depth.test_func = GL_LESS; depth.write_mask = GL_TRUE; + color_mask.red_enabled = GL_TRUE; + color_mask.green_enabled = GL_TRUE; + color_mask.blue_enabled = GL_TRUE; + color_mask.alpha_enabled = GL_TRUE; + stencil.test_enabled = false; stencil.test_func = GL_ALWAYS; stencil.test_ref = 0; @@ -32,6 +37,8 @@ OpenGLState::OpenGLState() { blend.color.blue = 0.0f; blend.color.alpha = 0.0f; + logic_op = GL_COPY; + for (auto& texture_unit : texture_units) { texture_unit.enabled_2d = false; texture_unit.texture_2d = 0; @@ -75,6 +82,15 @@ void OpenGLState::Apply() { glDepthMask(depth.write_mask); } + // Color mask + if (color_mask.red_enabled != cur_state.color_mask.red_enabled || + color_mask.green_enabled != cur_state.color_mask.green_enabled || + color_mask.blue_enabled != cur_state.color_mask.blue_enabled || + color_mask.alpha_enabled != cur_state.color_mask.alpha_enabled) { + glColorMask(color_mask.red_enabled, color_mask.green_enabled, + color_mask.blue_enabled, color_mask.alpha_enabled); + } + // Stencil test if (stencil.test_enabled != cur_state.stencil.test_enabled) { if (stencil.test_enabled) { @@ -82,11 +98,11 @@ void OpenGLState::Apply() { } else { glDisable(GL_STENCIL_TEST); } - } + } if (stencil.test_func != cur_state.stencil.test_func || - stencil.test_ref != cur_state.stencil.test_ref || - stencil.test_mask != cur_state.stencil.test_mask) { + stencil.test_ref != cur_state.stencil.test_ref || + stencil.test_mask != cur_state.stencil.test_mask) { glStencilFunc(stencil.test_func, stencil.test_ref, stencil.test_mask); } @@ -99,23 +115,34 @@ void OpenGLState::Apply() { if (blend.enabled != cur_state.blend.enabled) { if (blend.enabled) { glEnable(GL_BLEND); + + cur_state.logic_op = GL_COPY; + glLogicOp(cur_state.logic_op); + glDisable(GL_COLOR_LOGIC_OP); } else { glDisable(GL_BLEND); + glEnable(GL_COLOR_LOGIC_OP); } } if (blend.color.red != cur_state.blend.color.red || - blend.color.green != cur_state.blend.color.green || - blend.color.blue != cur_state.blend.color.blue || - blend.color.alpha != cur_state.blend.color.alpha) { - glBlendColor(blend.color.red, blend.color.green, blend.color.blue, blend.color.alpha); + blend.color.green != cur_state.blend.color.green || + blend.color.blue != cur_state.blend.color.blue || + blend.color.alpha != cur_state.blend.color.alpha) { + glBlendColor(blend.color.red, blend.color.green, + blend.color.blue, blend.color.alpha); } if (blend.src_rgb_func != cur_state.blend.src_rgb_func || - blend.dst_rgb_func != cur_state.blend.dst_rgb_func || - blend.src_a_func != cur_state.blend.src_a_func || - blend.dst_a_func != cur_state.blend.dst_a_func) { - glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, blend.src_a_func, blend.dst_a_func); + blend.dst_rgb_func != cur_state.blend.dst_rgb_func || + blend.src_a_func != cur_state.blend.src_a_func || + blend.dst_a_func != cur_state.blend.dst_a_func) { + glBlendFuncSeparate(blend.src_rgb_func, blend.dst_rgb_func, + blend.src_a_func, blend.dst_a_func); + } + + if (logic_op != cur_state.logic_op) { + glLogicOp(logic_op); } // Textures diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 281b7cad5..26b916360 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -20,6 +20,13 @@ public: } depth; struct { + GLboolean red_enabled; + GLboolean green_enabled; + GLboolean blue_enabled; + GLboolean alpha_enabled; + } color_mask; // GL_COLOR_WRITEMASK + + struct { bool test_enabled; // GL_STENCIL_TEST GLenum test_func; // GL_STENCIL_FUNC GLint test_ref; // GL_STENCIL_REF @@ -42,6 +49,8 @@ public: } color; // GL_BLEND_COLOR } blend; + GLenum logic_op; // GL_LOGIC_OP_MODE + // 3 texture units - one for each that is used in PICA fragment shader emulation struct { bool enabled_2d; // GL_TEXTURE_2D @@ -61,7 +70,7 @@ public: static const OpenGLState& GetCurState() { return cur_state; } - + /// Apply this state as the current OpenGL state void Apply(); diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index f8763e71b..e566f9f7a 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -71,6 +71,37 @@ inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { return blend_func_table[(unsigned)factor]; } +inline GLenum LogicOp(Pica::Regs::LogicOp op) { + static const GLenum logic_op_table[] = { + GL_CLEAR, // Clear + GL_AND, // And + GL_AND_REVERSE, // AndReverse + GL_COPY, // Copy + GL_SET, // Set + GL_COPY_INVERTED, // CopyInverted + GL_NOOP, // NoOp + GL_INVERT, // Invert + GL_NAND, // Nand + GL_OR, // Or + GL_NOR, // Nor + GL_XOR, // Xor + GL_EQUIV, // Equiv + GL_AND_INVERTED, // AndInverted + GL_OR_REVERSE, // OrReverse + GL_OR_INVERTED, // OrInverted + }; + + // Range check table for input + if ((unsigned)op >= ARRAY_SIZE(logic_op_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown logic op %d", op); + UNREACHABLE(); + + return GL_COPY; + } + + return logic_op_table[(unsigned)op]; +} + inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { static const GLenum compare_func_table[] = { GL_NEVER, // CompareFunc::Never diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 16cf92e20..3399ca123 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -157,7 +157,7 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& state.texture_units[0].enabled_2d = true; state.texture_units[0].texture_2d = texture.handle; state.Apply(); - + glActiveTexture(GL_TEXTURE0); glPixelStorei(GL_UNPACK_ROW_LENGTH, (GLint)pixel_stride); @@ -170,6 +170,9 @@ void RendererOpenGL::LoadFBToActiveGLTexture(const GPU::Regs::FramebufferConfig& texture.gl_format, texture.gl_type, framebuffer_data); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + state.texture_units[0].texture_2d = 0; + state.Apply(); } /** @@ -239,6 +242,9 @@ void RendererOpenGL::InitOpenGLObjects() { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } + state.texture_units[0].texture_2d = 0; + state.Apply(); + hw_rasterizer->InitObjects(); } @@ -370,6 +376,8 @@ void RendererOpenGL::Init() { } LOG_INFO(Render_OpenGL, "GL_VERSION: %s", glGetString(GL_VERSION)); + LOG_INFO(Render_OpenGL, "GL_VENDOR: %s", glGetString(GL_VENDOR)); + LOG_INFO(Render_OpenGL, "GL_RENDERER: %s", glGetString(GL_RENDERER)); InitOpenGLObjects(); } diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 7d68998f1..87006a832 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -119,17 +119,13 @@ static void ProcessShaderCode(VertexShaderState& state) { switch (instr.opcode.Value().GetInfo().type) { case OpCode::Type::Arithmetic: { - bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed); - // TODO: We don't really support this properly: For instance, the address register - // offset needs to be applied to SRC2 instead, etc. - // For now, we just abort in this situation. - ASSERT_MSG(!is_inverted, "Bad condition..."); + const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); const int address_offset = (instr.common.address_register_index == 0) ? 0 : state.address_registers[instr.common.address_register_index - 1]; - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); - const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + (!is_inverted * address_offset)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted) + ( is_inverted * address_offset)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); @@ -208,6 +204,15 @@ static void ProcessShaderCode(VertexShaderState& state) { } break; + case OpCode::Id::MIN: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = std::min(src1[i], src2[i]); + } + break; + case OpCode::Id::DP3: case OpCode::Id::DP4: { @@ -279,6 +284,16 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case OpCode::Id::SLT: + case OpCode::Id::SLTI: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = (src1[i] < src2[i]) ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); + } + break; + case OpCode::Id::CMP: for (int i = 0; i < 2; ++i) { // TODO: Can you restrict to one compare via dest masking? @@ -330,7 +345,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case OpCode::Type::MultiplyAdd: { - if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || + if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) || (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) { const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id]; @@ -547,7 +562,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { const auto& attribute_register_map = regs.vs_input_register_map; float24 dummy_register; boost::fill(state.input_register_table, &dummy_register); - + if (num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; if (num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; if (num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; |