nir/opt_algebraic: optimize more fmulz(1.0, a) remains
If dxvk's opencoded fmulz gets partially constant folded, it leaves this mess behind. It's important to do this before the more general fmul+b2f patterns added in the next commit, because they change the signed zero behavior in a way that can't be optimized back. Foz-DB Navi48: Totals from 36 (0.03% of 114655) affected shaders: Instrs: 16513 -> 15706 (-4.89%) CodeSize: 99756 -> 95760 (-4.01%) Latency: 45165 -> 44151 (-2.25%) InvThroughput: 8344 -> 7886 (-5.49%) VClause: 395 -> 401 (+1.52%) Copies: 639 -> 634 (-0.78%) PreSGPRs: 1158 -> 1154 (-0.35%) PreVGPRs: 1227 -> 1225 (-0.16%) VALU: 11310 -> 10769 (-4.78%) SALU: 813 -> 809 (-0.49%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40399>
This commit is contained in:
parent
3ad142d4d7
commit
d2b37b667e
1 changed files with 19 additions and 2 deletions
|
|
@ -1571,6 +1571,24 @@ for op in ('ior', 'iand', 'ixor'):
|
|||
(('iand', (op, ('b2i', 'a@1'), ('ineg', ('b2i', 'b@1'))), 1), ('b2i', (op, a, b)) ),
|
||||
])
|
||||
|
||||
|
||||
for compare in [('fneu', a, 0.0), ('inot', ('feq', a, 0.0))]:
|
||||
mod = [a, ('fneg', a), ('fabs', a), ('fneg', ('fabs', a))]
|
||||
|
||||
for i in range(len(mod)):
|
||||
for neg_b2f in [False, True]:
|
||||
|
||||
search_mod = mod[i]
|
||||
replace_mod = mod[i ^ int(neg_b2f)];
|
||||
search_b2f = ('fneg', ('b2f', compare)) if neg_b2f else ('b2f', compare)
|
||||
|
||||
replace_mod_mul = ('fcanonicalize', a) if replace_mod == a else replace_mod
|
||||
|
||||
optimizations.extend([
|
||||
(('fmul', search_b2f, search_mod), replace_mod_mul),
|
||||
(('ffma', search_b2f, search_mod, b), ('fadd', replace_mod, b)),
|
||||
])
|
||||
|
||||
optimizations.extend([
|
||||
(('feq', ('seq', a, b), 1.0), ('feq', a, b)),
|
||||
(('feq', ('sne', a, b), 1.0), ('fneu', a, b)),
|
||||
|
|
@ -1599,13 +1617,12 @@ optimizations.extend([
|
|||
(('ffma', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)),
|
||||
(('fadd', 1.0, ('fneg', ('b2f', a))), ('b2f', ('inot', a))),
|
||||
(('fadd(nsz)', -1.0, ('b2f', a)), ('fneg', ('b2f', ('inot', a)))),
|
||||
(('fmul', ('b2f', ('fneu', a, 0)), a), ('fmul', 1.0, a)),
|
||||
(('ffma', ('b2f', ('fneu', a, 0)), a, b), ('fadd', a, b)),
|
||||
(('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))),
|
||||
(('fsat', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1')))), ('b2f', ('iand', a, ('inot', b)))),
|
||||
(('fmax', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('b2f', ('iand', a, ('inot', b)))),
|
||||
(('iand', 'a@bool16', 1.0), ('b2f', a)),
|
||||
(('iand', 'a@bool32', 1.0), ('b2f', a)),
|
||||
|
||||
# Comparison with the same args. Note that these are only done for the
|
||||
# float versions when the source must be a number. Generally, NaN cmp NaN
|
||||
# produces the opposite result of X cmp X. flt is the outlier. NaN < NaN
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue