From 4e35177e23069ad7a4cb0fdfa2ad5b34300c44f7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 9 Aug 2019 23:50:21 -0300 Subject: shader_ir: Implement VOTE Implement VOTE using Nvidia's intrinsics. Documentation about these can be found here https://developer.nvidia.com/reading-between-threads-shader-intrinsics Instead of using portable ARB instructions I opted to use Nvidia intrinsics because these are the closest we have to how Tegra X1 hardware renders. To stub VOTE on non-Nvidia drivers (including nouveau) this commit simulates a GPU with a warp size of one, returning what is meaningful for the instruction being emulated: * anyThreadNV(value) -> value * allThreadsNV(value) -> value * allThreadsEqualNV(value) -> true ballotARB, also known as "uint64_t(activeThreadsNV())", emits VOTE.ANY Rd, PT, PT; on nouveau's compiler. This doesn't match exactly to Nvidia's code VOTE.ALL Rd, PT, PT; Which is emulated with activeThreadsNV() by this commit. In theory this shouldn't really matter since .ANY, .ALL and .EQ affect the predicates (set to PT on those cases) and not the registers. --- src/video_core/shader/shader_ir.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/video_core/shader/shader_ir.h') diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 59a083d90..99d06ff4a 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -167,6 +167,7 @@ private: u32 DecodeFfma(NodeBlock& bb, u32 pc); u32 DecodeHfma2(NodeBlock& bb, u32 pc); u32 DecodeConversion(NodeBlock& bb, u32 pc); + u32 DecodeWarp(NodeBlock& bb, u32 pc); u32 DecodeMemory(NodeBlock& bb, u32 pc); u32 DecodeTexture(NodeBlock& bb, u32 pc); u32 DecodeImage(NodeBlock& bb, u32 pc); -- cgit v1.2.3