summaryrefslogtreecommitdiffstats
path: root/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
diff options
context:
space:
mode:
authorMarkus Wick <markus@selfnet.de>2022-03-07 19:13:05 +0100
committerMarkus Wick <markus@selfnet.de>2022-03-08 09:44:28 +0100
commitc78c8190d5b964c9421b7d0c0c3a96ba599b3c2f (patch)
treef9586978f8b87bf6fcd56dbebe4eabe2fa469cf3 /src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
parentMerge pull request #7974 from bunnei/improve-code-mem (diff)
downloadyuzu-c78c8190d5b964c9421b7d0c0c3a96ba599b3c2f.tar
yuzu-c78c8190d5b964c9421b7d0c0c3a96ba599b3c2f.tar.gz
yuzu-c78c8190d5b964c9421b7d0c0c3a96ba599b3c2f.tar.bz2
yuzu-c78c8190d5b964c9421b7d0c0c3a96ba599b3c2f.tar.lz
yuzu-c78c8190d5b964c9421b7d0c0c3a96ba599b3c2f.tar.xz
yuzu-c78c8190d5b964c9421b7d0c0c3a96ba599b3c2f.tar.zst
yuzu-c78c8190d5b964c9421b7d0c0c3a96ba599b3c2f.zip
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py92
1 files changed, 92 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
new file mode 100644
index 000000000..8f547c266
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py
@@ -0,0 +1,92 @@
+# Copyright © 2022 degasus <markus@selfnet.de>
+# This work is free. You can redistribute it and/or modify it under the
+# terms of the Do What The Fuck You Want To Public License, Version 2,
+# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details.
+
+from itertools import product
+
+# The primitive instructions
+OPS = {
+ 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b),
+ 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b),
+ 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b),
+ 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions
+}
+
+# Our database of combination of instructions
+optimized_calls = {}
+def cmp(lhs, rhs):
+ if lhs is None: # new entry
+ return True
+ if lhs[3] > rhs[3]: # costs
+ return True
+ if lhs[3] < rhs[3]: # costs
+ return False
+ if len(lhs[0]) > len(rhs[0]): # string len
+ return True
+ if len(lhs[0]) < len(rhs[0]): # string len
+ return False
+ if lhs[0] > rhs[0]: # string sorting
+ return True
+ if lhs[0] < rhs[0]: # string sorting
+ return False
+ assert lhs == rhs, "redundant instruction, bug in brute force"
+ return False
+def register(imm, instruction, count, latency):
+ # Use the sum of instruction count and latency as costs to evaluate which combination is best
+ costs = count + latency
+
+ old = optimized_calls.get(imm, None)
+ new = (instruction, count, latency, costs)
+
+ # Update if new or better
+ if cmp(old, new):
+ optimized_calls[imm] = new
+ return True
+
+ return False
+
+# Constants: 0, 1 (for free)
+register(0, 'ir.Imm32(0)', 0, 0)
+register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0)
+
+# Inputs: a, b, c (for free)
+ta = 0xF0
+tb = 0xCC
+tc = 0xAA
+inputs = {
+ ta : 'a',
+ tb : 'b',
+ tc : 'c',
+}
+for imm, instruction in inputs.items():
+ register(imm, instruction, 0, 0)
+ register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs
+
+# Try to combine two values from the db with an instruction.
+# If it is better than the old method, update it.
+while True:
+ registered = 0
+ calls_copy = optimized_calls.copy()
+ for OP, (argc, cost, f) in OPS.items():
+ for args in product(calls_copy.items(), repeat=argc):
+ # unpack(transponse) the arrays
+ imm = [arg[0] for arg in args]
+ value = [arg[1][0] for arg in args]
+ count = [arg[1][1] for arg in args]
+ latency = [arg[1][2] for arg in args]
+
+ registered += register(
+ f(*imm),
+ OP.format(*value),
+ sum(count) + cost,
+ max(latency) + cost)
+ if registered == 0:
+ # No update at all? So terminate
+ break
+
+# Hacky output. Please improve me to output valid C++ instead.
+s = """ case {imm}:
+ return {op};"""
+for imm in range(256):
+ print(s.format(imm=imm, op=optimized_calls[imm][0]))