5 files changed, 66 insertions, 15 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d75de85e2..f32a17057 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -75,6 +75,10 @@ union Attribute {
     enum class Index : u64 {
         Position = 7,
         Attribute_0 = 8,
+        // This attribute contains a tuple of (~, ~, InstanceId, VertexId) when inside a vertex
+        // shader, and a tuple of (TessCoord.x, TessCoord.y, TessCoord.z, ~) when inside a Tess Eval
+        // shader.
+        TessCoordInstanceIDVertexID = 47,
     };
 
     union {
@@ -456,9 +460,9 @@ private:
             INST("00011110--------", Id::FMUL32_IMM, Type::Arithmetic, "FMUL32_IMM"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0101110010010---", Id::RRO, Type::Arithmetic, "RRO"),
-            INST("0100110010101---", Id::F2F_C, Type::Arithmetic, "F2F_C"),
-            INST("0101110010101---", Id::F2F_R, Type::Arithmetic, "F2F_R"),
-            INST("0011100-10101---", Id::F2F_IMM, Type::Arithmetic, "F2F_IMM"),
+            INST("0100110010101---", Id::F2F_C, Type::Conversion, "F2F_C"),
+            INST("0101110010101---", Id::F2F_R, Type::Conversion, "F2F_R"),
+            INST("0011100-10101---", Id::F2F_IMM, Type::Conversion, "F2F_IMM"),
             INST("0100110010110---", Id::F2I_C, Type::Arithmetic, "F2I_C"),
             INST("0101110010110---", Id::F2I_R, Type::Arithmetic, "F2I_R"),
             INST("0011100-10110---", Id::F2I_IMM, Type::Arithmetic, "F2I_IMM"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 5bb34037b..65d643447 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -45,21 +45,23 @@ struct FormatTuple {
 
 static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                    // ABGR8
-    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                       // B5G6R5
+    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, false},                           // B5G6R5
     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},              // A2B10G10R10
     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false},                // A1B5G5R5
     {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false},                                   // R8
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false},                                // RGBA16F
     {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true},   // DXT1
     {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
     {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
+    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true},           // DXN1
 }};
 
 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
     const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
     if (type == SurfaceType::ColorTexture) {
         ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
-        // For now only UNORM components are supported
-        ASSERT(component_type == ComponentType::UNorm);
+        // For now only UNORM components are supported, or RGBA16F which is type FLOAT
+        ASSERT(component_type == ComponentType::UNorm || pixel_format == PixelFormat::RGBA16F);
         return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
     } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
         // TODO(Subv): Implement depth formats
@@ -110,8 +112,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
     morton_to_gl_fns = {
         MortonCopy<true, PixelFormat::ABGR8>,       MortonCopy<true, PixelFormat::B5G6R5>,
         MortonCopy<true, PixelFormat::A2B10G10R10>, MortonCopy<true, PixelFormat::A1B5G5R5>,
-        MortonCopy<true, PixelFormat::R8>,          MortonCopy<true, PixelFormat::DXT1>,
-        MortonCopy<true, PixelFormat::DXT23>,       MortonCopy<true, PixelFormat::DXT45>,
+        MortonCopy<true, PixelFormat::R8>,          MortonCopy<true, PixelFormat::RGBA16F>,
+        MortonCopy<true, PixelFormat::DXT1>,        MortonCopy<true, PixelFormat::DXT23>,
+        MortonCopy<true, PixelFormat::DXT45>,       MortonCopy<true, PixelFormat::DXN1>,
 };
 
 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
@@ -123,7 +126,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra:
         MortonCopy<false, PixelFormat::A2B10G10R10>,
         MortonCopy<false, PixelFormat::A1B5G5R5>,
         MortonCopy<false, PixelFormat::R8>,
-        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
+        MortonCopy<false, PixelFormat::RGBA16F>,
+        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45/DXN1 formats is not yet supported
+        nullptr,
         nullptr,
         nullptr,
         nullptr,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index fc09f108c..6f08678ab 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -59,9 +59,11 @@ struct SurfaceParams {
         A2B10G10R10 = 2,
         A1B5G5R5 = 3,
         R8 = 4,
-        DXT1 = 5,
-        DXT23 = 6,
-        DXT45 = 7,
+        RGBA16F = 5,
+        DXT1 = 6,
+        DXT23 = 7,
+        DXT45 = 8,
+        DXN1 = 9, // This is also known as BC4
 
         Max,
         Invalid = 255,
@@ -102,9 +104,11 @@ struct SurfaceParams {
             1, // A2B10G10R10
             1, // A1B5G5R5
             1, // R8
+            2, // RGBA16F
             4, // DXT1
             4, // DXT23
             4, // DXT45
+            4, // DXN1
         }};
 
         ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -124,9 +128,11 @@ struct SurfaceParams {
             32,  // A2B10G10R10
             16,  // A1B5G5R5
             8,   // R8
+            64,  // RGBA16F
             64,  // DXT1
             128, // DXT23
             128, // DXT45
+            64,  // DXN1
         }};
 
         ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -143,6 +149,8 @@ struct SurfaceParams {
             return PixelFormat::ABGR8;
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
             return PixelFormat::A2B10G10R10;
+        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+            return PixelFormat::RGBA16F;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -172,12 +180,16 @@ struct SurfaceParams {
             return PixelFormat::A1B5G5R5;
         case Tegra::Texture::TextureFormat::R8:
             return PixelFormat::R8;
+        case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
+            return PixelFormat::RGBA16F;
         case Tegra::Texture::TextureFormat::DXT1:
             return PixelFormat::DXT1;
         case Tegra::Texture::TextureFormat::DXT23:
             return PixelFormat::DXT23;
         case Tegra::Texture::TextureFormat::DXT45:
             return PixelFormat::DXT45;
+        case Tegra::Texture::TextureFormat::DXN1:
+            return PixelFormat::DXN1;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -197,12 +209,16 @@ struct SurfaceParams {
             return Tegra::Texture::TextureFormat::A1B5G5R5;
         case PixelFormat::R8:
             return Tegra::Texture::TextureFormat::R8;
+        case PixelFormat::RGBA16F:
+            return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
         case PixelFormat::DXT1:
             return Tegra::Texture::TextureFormat::DXT1;
         case PixelFormat::DXT23:
             return Tegra::Texture::TextureFormat::DXT23;
         case PixelFormat::DXT45:
             return Tegra::Texture::TextureFormat::DXT45;
+        case PixelFormat::DXN1:
+            return Tegra::Texture::TextureFormat::DXN1;
         default:
             UNREACHABLE();
         }
@@ -226,6 +242,8 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RGBA8_SRGB:
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
             return ComponentType::UNorm;
+        case Tegra::RenderTargetFormat::RGBA16_FLOAT:
+            return ComponentType::Float;
         default:
             NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 75822e750..68efe74b8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -299,7 +299,7 @@ public:
      * are stored as floats, so this may require conversion.
      * @param reg The destination register to use.
      * @param elem The element to use for the operation.
-     * @param attribute The input attibute to use as the source value.
+     * @param attribute The input attribute to use as the source value.
      */
     void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute) {
         std::string dest = GetRegisterAsFloat(reg);
@@ -451,6 +451,12 @@ private:
         switch (attribute) {
         case Attribute::Index::Position:
             return "position";
+        case Attribute::Index::TessCoordInstanceIDVertexID:
+            // TODO(Subv): Find out what the values are for the first two elements when inside a
+            // vertex shader, and what's the value of the fourth element when inside a Tess Eval
+            // shader.
+            ASSERT(stage == Maxwell3D::Regs::ShaderStage::Vertex);
+            return "vec4(0, 0, gl_InstanceID, gl_VertexID)";
         default:
             const u32 index{static_cast<u32>(attribute) -
                             static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -834,13 +840,14 @@ private:
         }
         case OpCode::Type::Conversion: {
             ASSERT_MSG(instr.conversion.size == Register::Size::Word, "Unimplemented");
-            ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
             ASSERT_MSG(!instr.conversion.negate_a, "Unimplemented");
             ASSERT_MSG(!instr.conversion.saturate_a, "Unimplemented");
 
             switch (opcode->GetId()) {
             case OpCode::Id::I2I_R:
             case OpCode::Id::I2F_R: {
+                ASSERT_MSG(!instr.conversion.selector, "Unimplemented");
+
                 std::string op_a =
                     regs.GetRegisterAsInteger(instr.gpr20, 0, instr.conversion.is_signed);
 
@@ -851,6 +858,16 @@ private:
                 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_signed, 0, op_a, 1, 1);
                 break;
             }
+            case OpCode::Id::F2F_R: {
+                std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
+
+                if (instr.conversion.abs_a) {
+                    op_a = "abs(" + op_a + ')';
+                }
+
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+                break;
+            }
             default: {
                 NGLOG_CRITICAL(HW_GPU, "Unhandled conversion instruction: {}", opcode->GetName());
                 UNREACHABLE();
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 9d7b73b73..2d2af5554 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -46,6 +46,7 @@ void CopySwizzledData(u32 width, u32 height, u32 bytes_per_pixel, u32 out_bytes_
 u32 BytesPerPixel(TextureFormat format) {
     switch (format) {
     case TextureFormat::DXT1:
+    case TextureFormat::DXN1:
         // In this case a 'pixel' actually refers to a 4x4 tile.
         return 8;
     case TextureFormat::DXT23:
@@ -60,6 +61,8 @@ u32 BytesPerPixel(TextureFormat format) {
         return 2;
     case TextureFormat::R8:
         return 1;
+    case TextureFormat::R16_G16_B16_A16:
+        return 8;
     default:
         UNIMPLEMENTED_MSG("Format not implemented");
         break;
@@ -77,7 +80,9 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
     case TextureFormat::DXT1:
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
-        // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values.
+    case TextureFormat::DXN1:
+        // In the DXT and DXN formats, each 4x4 tile is swizzled instead of just individual pixel
+        // values.
         CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
                          unswizzled_data.data(), true, block_height);
         break;
@@ -86,6 +91,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
     case TextureFormat::R8:
+    case TextureFormat::R16_G16_B16_A16:
         CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
                          unswizzled_data.data(), true, block_height);
         break;
@@ -106,6 +112,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::DXT1:
     case TextureFormat::DXT23:
     case TextureFormat::DXT45:
+    case TextureFormat::DXN1:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::A1B5G5R5: