summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/engines/maxwell_3d.h22
-rw-r--r--src/video_core/engines/shader_bytecode.h15
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h9
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp30
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h8
-rw-r--r--src/video_core/textures/decoders.cpp2
8 files changed, 100 insertions, 7 deletions
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 988a6433e..cc1f90de6 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -478,7 +478,9 @@ public:
u32 depth_write_enabled;
- INSERT_PADDING_WORDS(0x8);
+ INSERT_PADDING_WORDS(0x7);
+
+ u32 d3d_cull_mode;
BitField<0, 3, ComparisonOp> depth_test_func;
@@ -498,7 +500,13 @@ public:
u32 enable[NumRenderTargets];
} blend;
- INSERT_PADDING_WORDS(0x2D);
+ INSERT_PADDING_WORDS(0xB);
+
+ union {
+ BitField<4, 1, u32> triangle_rast_flip;
+ } screen_y_control;
+
+ INSERT_PADDING_WORDS(0x21);
u32 vb_element_base;
@@ -528,7 +536,12 @@ public:
}
} tic;
- INSERT_PADDING_WORDS(0x22);
+ INSERT_PADDING_WORDS(0x21);
+
+ union {
+ BitField<2, 1, u32> coord_origin;
+ BitField<3, 10, u32> enable;
+ } point_coord_replace;
struct {
u32 code_address_high;
@@ -818,11 +831,14 @@ ASSERT_REG_POSITION(rt_control, 0x487);
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
+ASSERT_REG_POSITION(d3d_cull_mode, 0x4C2);
ASSERT_REG_POSITION(depth_test_func, 0x4C3);
ASSERT_REG_POSITION(blend, 0x4CF);
+ASSERT_REG_POSITION(screen_y_control, 0x4EB);
ASSERT_REG_POSITION(vb_element_base, 0x50D);
ASSERT_REG_POSITION(tsc, 0x557);
ASSERT_REG_POSITION(tic, 0x55D);
+ASSERT_REG_POSITION(point_coord_replace, 0x581);
ASSERT_REG_POSITION(code_address, 0x582);
ASSERT_REG_POSITION(draw, 0x585);
ASSERT_REG_POSITION(index_array, 0x5F2);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index c1226a649..da1aaeeee 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -329,6 +329,19 @@ union Instruction {
} isetp;
union {
+ BitField<0, 3, u64> pred0;
+ BitField<3, 3, u64> pred3;
+ BitField<12, 3, u64> pred12;
+ BitField<15, 1, u64> neg_pred12;
+ BitField<24, 2, PredOperation> cond;
+ BitField<29, 3, u64> pred29;
+ BitField<32, 1, u64> neg_pred29;
+ BitField<39, 3, u64> pred39;
+ BitField<42, 1, u64> neg_pred39;
+ BitField<45, 2, PredOperation> op;
+ } psetp;
+
+ union {
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> neg_a;
@@ -641,7 +654,7 @@ private:
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
- INST("1100000000111---", Id::TEX, Type::Memory, "TEX"),
+ INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e516eb1ad..bacb389e1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -686,7 +686,10 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
// Bind the uniform to the sampler.
GLint uniform = glGetUniformLocation(program, entry.GetName().c_str());
- ASSERT(uniform != -1);
+ if (uniform == -1) {
+ continue;
+ }
+
glProgramUniform1i(program, uniform, current_bindpoint);
const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
@@ -771,6 +774,16 @@ void RasterizerOpenGL::SyncCullMode() {
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
+
+ // If the GPU is configured to flip the rasterized triangles, then we need to flip the
+ // notion of front and back. Note: We flip the triangles when the value of the register is 0
+ // because OpenGL already does it for us.
+ if (regs.screen_y_control.triangle_rast_flip == 0) {
+ if (state.cull.front_face == GL_CCW)
+ state.cull.front_face = GL_CW;
+ else if (state.cull.front_face == GL_CW)
+ state.cull.front_face = GL_CCW;
+ }
}
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 50469c05c..57d7763ff 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -108,7 +108,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
false}, // Z24S8
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
- false}, // S8Z24
+ false}, // S8Z24
+ {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
}};
static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -191,7 +192,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<true, PixelFormat::DXT1>, MortonCopy<true, PixelFormat::DXT23>,
MortonCopy<true, PixelFormat::DXT45>, MortonCopy<true, PixelFormat::DXN1>,
MortonCopy<true, PixelFormat::ASTC_2D_4X4>, MortonCopy<true, PixelFormat::Z24S8>,
- MortonCopy<true, PixelFormat::S8Z24>,
+ MortonCopy<true, PixelFormat::S8Z24>, MortonCopy<true, PixelFormat::Z32F>,
};
static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -213,6 +214,7 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
MortonCopy<false, PixelFormat::ABGR8>,
MortonCopy<false, PixelFormat::Z24S8>,
MortonCopy<false, PixelFormat::S8Z24>,
+ MortonCopy<false, PixelFormat::Z32F>,
};
// Allocate an uninitialized texture of appropriate size and format for the surface
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 8005a81b8..b4d7f8ebe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -42,6 +42,7 @@ struct SurfaceParams {
// DepthStencil formats
Z24S8 = 13,
S8Z24 = 14,
+ Z32F = 15,
MaxDepthStencilFormat,
@@ -94,6 +95,7 @@ struct SurfaceParams {
4, // ASTC_2D_4X4
1, // Z24S8
1, // S8Z24
+ 1, // Z32F
}};
ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -120,6 +122,7 @@ struct SurfaceParams {
32, // ASTC_2D_4X4
32, // Z24S8
32, // S8Z24
+ 32, // Z32F
}};
ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -135,6 +138,8 @@ struct SurfaceParams {
return PixelFormat::S8Z24;
case Tegra::DepthFormat::Z24_S8_UNORM:
return PixelFormat::Z24S8;
+ case Tegra::DepthFormat::Z32_FLOAT:
+ return PixelFormat::Z32F;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
@@ -235,6 +240,8 @@ struct SurfaceParams {
return Tegra::DepthFormat::S8_Z24_UNORM;
case PixelFormat::Z24S8:
return Tegra::DepthFormat::Z24_S8_UNORM;
+ case PixelFormat::Z32F:
+ return Tegra::DepthFormat::Z32_FLOAT;
default:
UNREACHABLE();
}
@@ -284,6 +291,8 @@ struct SurfaceParams {
case Tegra::DepthFormat::S8_Z24_UNORM:
case Tegra::DepthFormat::Z24_S8_UNORM:
return ComponentType::UNorm;
+ case Tegra::DepthFormat::Z32_FLOAT:
+ return ComponentType::Float;
default:
LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
UNREACHABLE();
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index f9a90a1a4..03ac4fe9e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1480,6 +1480,36 @@ private:
}
break;
}
+ case OpCode::Type::PredicateSetPredicate: {
+ std::string op_a =
+ GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
+ std::string op_b =
+ GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
+
+ using Tegra::Shader::Pred;
+ // We can't use the constant predicate as destination.
+ ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+ std::string second_pred =
+ GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
+
+ std::string combiner = GetPredicateCombiner(instr.psetp.op);
+
+ std::string predicate =
+ '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
+
+ // Set the primary predicate to the result of Predicate OP SecondPredicate
+ SetPredicate(instr.psetp.pred3,
+ '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+
+ if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+ // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+ // if enabled
+ SetPredicate(instr.psetp.pred0,
+ "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+ }
+ break;
+ }
case OpCode::Type::FloatSet: {
std::string op_a = instr.fset.neg_a ? "-" : "";
op_a += regs.GetRegisterAsFloat(instr.gpr8);
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 6b9bb3df1..6ce53bbd9 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -29,6 +29,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return GL_UNSIGNED_SHORT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return GL_UNSIGNED_INT_2_10_10_10_REV;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
@@ -41,6 +45,10 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return GL_SHORT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return GL_INT_2_10_10_10_REV;
}
LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7b06fea3e..d5ab4e4f9 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -78,6 +78,7 @@ static u32 DepthBytesPerPixel(DepthFormat format) {
switch (format) {
case DepthFormat::S8_Z24_UNORM:
case DepthFormat::Z24_S8_UNORM:
+ case DepthFormat::Z32_FLOAT:
return 4;
default:
UNIMPLEMENTED_MSG("Format not implemented");
@@ -132,6 +133,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
switch (format) {
case DepthFormat::S8_Z24_UNORM:
case DepthFormat::Z24_S8_UNORM:
+ case DepthFormat::Z32_FLOAT:
CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
unswizzled_data.data(), true, block_height);
break;