diff options
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py')
-rw-r--r-- | src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py new file mode 100644 index 000000000..8f547c266 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input_lut3.py @@ -0,0 +1,92 @@ +# Copyright © 2022 degasus <markus@selfnet.de> +# This work is free. You can redistribute it and/or modify it under the +# terms of the Do What The Fuck You Want To Public License, Version 2, +# as published by Sam Hocevar. See http://www.wtfpl.net/ for more details. + +from itertools import product + +# The primitive instructions +OPS = { + 'ir.BitwiseAnd({}, {})' : (2, 1, lambda a,b: a&b), + 'ir.BitwiseOr({}, {})' : (2, 1, lambda a,b: a|b), + 'ir.BitwiseXor({}, {})' : (2, 1, lambda a,b: a^b), + 'ir.BitwiseNot({})' : (1, 0.1, lambda a: (~a) & 255), # Only tiny cost, as this can often inlined in other instructions +} + +# Our database of combination of instructions +optimized_calls = {} +def cmp(lhs, rhs): + if lhs is None: # new entry + return True + if lhs[3] > rhs[3]: # costs + return True + if lhs[3] < rhs[3]: # costs + return False + if len(lhs[0]) > len(rhs[0]): # string len + return True + if len(lhs[0]) < len(rhs[0]): # string len + return False + if lhs[0] > rhs[0]: # string sorting + return True + if lhs[0] < rhs[0]: # string sorting + return False + assert lhs == rhs, "redundant instruction, bug in brute force" + return False +def register(imm, instruction, count, latency): + # Use the sum of instruction count and latency as costs to evaluate which combination is best + costs = count + latency + + old = optimized_calls.get(imm, None) + new = (instruction, count, latency, costs) + + # Update if new or better + if cmp(old, new): + optimized_calls[imm] = new + return True + + return False + +# Constants: 0, 1 (for free) +register(0, 'ir.Imm32(0)', 0, 0) +register(255, 'ir.Imm32(0xFFFFFFFF)', 0, 0) + +# Inputs: a, b, c (for free) +ta = 0xF0 +tb = 0xCC +tc = 0xAA +inputs = { + ta : 'a', + tb : 'b', + tc : 'c', +} +for imm, instruction in inputs.items(): + register(imm, instruction, 0, 0) + register((~imm) & 255, 'ir.BitwiseNot({})'.format(instruction), 0.099, 0.099) # slightly cheaper NEG on inputs + +# Try to combine two values from the db with an instruction. +# If it is better than the old method, update it. +while True: + registered = 0 + calls_copy = optimized_calls.copy() + for OP, (argc, cost, f) in OPS.items(): + for args in product(calls_copy.items(), repeat=argc): + # unpack(transponse) the arrays + imm = [arg[0] for arg in args] + value = [arg[1][0] for arg in args] + count = [arg[1][1] for arg in args] + latency = [arg[1][2] for arg in args] + + registered += register( + f(*imm), + OP.format(*value), + sum(count) + cost, + max(latency) + cost) + if registered == 0: + # No update at all? So terminate + break + +# Hacky output. Please improve me to output valid C++ instead. +s = """ case {imm}: + return {op};""" +for imm in range(256): + print(s.format(imm=imm, op=optimized_calls[imm][0])) |