nir/opt_algebraic: optimize more fmulz(1.0, a) remains

If dxvk's opencoded fmulz gets partially constant folded,
it leaves this mess behind.

It's important to do this before the more general fmul+b2f patterns added
in the next commit, because they change the signed zero behavior in a way
that can't be optimized back.

Foz-DB Navi48:
Totals from 36 (0.03% of 114655) affected shaders:

Instrs: 16513 -> 15706 (-4.89%)
CodeSize: 99756 -> 95760 (-4.01%)
Latency: 45165 -> 44151 (-2.25%)
InvThroughput: 8344 -> 7886 (-5.49%)
VClause: 395 -> 401 (+1.52%)
Copies: 639 -> 634 (-0.78%)
PreSGPRs: 1158 -> 1154 (-0.35%)
PreVGPRs: 1227 -> 1225 (-0.16%)
VALU: 11310 -> 10769 (-4.78%)
SALU: 813 -> 809 (-0.49%)
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40399>
This commit is contained in:
Georg Lehmann 2026-03-14 07:11:50 +01:00 committed by Marge Bot
parent 3ad142d4d7
commit d2b37b667e

View file

@ -1571,6 +1571,24 @@ for op in ('ior', 'iand', 'ixor'):
(('iand', (op, ('b2i', 'a@1'), ('ineg', ('b2i', 'b@1'))), 1), ('b2i', (op, a, b)) ),
])
for compare in [('fneu', a, 0.0), ('inot', ('feq', a, 0.0))]:
mod = [a, ('fneg', a), ('fabs', a), ('fneg', ('fabs', a))]
for i in range(len(mod)):
for neg_b2f in [False, True]:
search_mod = mod[i]
replace_mod = mod[i ^ int(neg_b2f)];
search_b2f = ('fneg', ('b2f', compare)) if neg_b2f else ('b2f', compare)
replace_mod_mul = ('fcanonicalize', a) if replace_mod == a else replace_mod
optimizations.extend([
(('fmul', search_b2f, search_mod), replace_mod_mul),
(('ffma', search_b2f, search_mod, b), ('fadd', replace_mod, b)),
])
optimizations.extend([
(('feq', ('seq', a, b), 1.0), ('feq', a, b)),
(('feq', ('sne', a, b), 1.0), ('fneu', a, b)),
@ -1599,13 +1617,12 @@ optimizations.extend([
(('ffma', ('b2f', 'a@1'), ('b2f', 'b@1'), c), ('fadd', ('b2f', ('iand', a, b)), c)),
(('fadd', 1.0, ('fneg', ('b2f', a))), ('b2f', ('inot', a))),
(('fadd(nsz)', -1.0, ('b2f', a)), ('fneg', ('b2f', ('inot', a)))),
(('fmul', ('b2f', ('fneu', a, 0)), a), ('fmul', 1.0, a)),
(('ffma', ('b2f', ('fneu', a, 0)), a, b), ('fadd', a, b)),
(('fsat', ('fadd', ('b2f', 'a@1'), ('b2f', 'b@1'))), ('b2f', ('ior', a, b))),
(('fsat', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1')))), ('b2f', ('iand', a, ('inot', b)))),
(('fmax', ('fadd', ('b2f', 'a@1'), ('fneg', ('b2f', 'b@1'))), 0.0), ('b2f', ('iand', a, ('inot', b)))),
(('iand', 'a@bool16', 1.0), ('b2f', a)),
(('iand', 'a@bool32', 1.0), ('b2f', a)),
# Comparison with the same args. Note that these are only done for the
# float versions when the source must be a number. Generally, NaN cmp NaN
# produces the opposite result of X cmp X. flt is the outlier. NaN < NaN