nir: change export_amd intrinsics to use enabled_channels instead of write_mask

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40415>
This commit is contained in:
Marek Olšák 2026-02-24 09:18:31 -05:00 committed by Marge Bot
parent e7f6c8ab7e
commit b75a3112fd
5 changed files with 47 additions and 50 deletions

View file

@ -203,7 +203,7 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha)
nir_def *undef = nir_undef(b, 1, 32);
nir_def *outputs[4] = {undef, undef, undef, undef};
unsigned write_mask = 0;
unsigned enabled_channels = 0;
unsigned flags = 0;
if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
@ -214,30 +214,30 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha)
if (s->stencil) {
outputs[0] = nir_ishl_imm(b, s->stencil, 16);
write_mask |= s->options->gfx_level >= GFX11 ? 0x1 : 0x3;
enabled_channels |= s->options->gfx_level >= GFX11 ? 0x1 : 0x3;
}
if (s->sample_mask) {
outputs[1] = s->sample_mask;
write_mask |= s->options->gfx_level >= GFX11 ? 0x2 : 0xc;
enabled_channels |= s->options->gfx_level >= GFX11 ? 0x2 : 0xc;
}
} else {
if (s->depth) {
outputs[0] = s->depth;
write_mask |= 0x1;
enabled_channels |= 0x1;
}
if (s->stencil) {
assert(format == V_028710_SPI_SHADER_32_GR ||
format == V_028710_SPI_SHADER_32_ABGR);
outputs[1] = s->stencil;
write_mask |= 0x2;
enabled_channels |= 0x2;
}
if (s->sample_mask) {
assert(format == V_028710_SPI_SHADER_32_ABGR);
outputs[2] = s->sample_mask;
write_mask |= 0x4;
enabled_channels |= 0x4;
}
if (mrtz_alpha) {
@ -245,17 +245,17 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha)
format == V_028710_SPI_SHADER_32_ABGR);
if (format == V_028710_SPI_SHADER_32_AR && s->options->gfx_level >= GFX10) {
outputs[1] = mrtz_alpha;
write_mask |= 0x2;
enabled_channels |= 0x2;
} else {
outputs[3] = mrtz_alpha;
write_mask |= 0x8;
enabled_channels |= 0x8;
}
}
}
s->exp[s->exp_num++] = nir_export_amd(b, nir_vec(b, outputs, 4),
.base = V_008DFC_SQ_EXP_MRTZ,
.write_mask = write_mask,
.enabled_channels = enabled_channels,
.flags = flags);
return true;
}
@ -357,7 +357,7 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
nir_def *undef = nir_undef(b, 1, 32);
nir_def *outputs[4] = {undef, undef, undef, undef};
unsigned write_mask = 0;
unsigned enabled_channels = 0;
unsigned flags = 0;
nir_alu_type type = s->color_type[output_index];
@ -381,32 +381,32 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
case V_028714_SPI_SHADER_32_R:
if (data[0]) {
outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32);
write_mask = 0x1;
enabled_channels = 0x1;
}
break;
case V_028714_SPI_SHADER_32_GR:
if (data[0]) {
outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32);
write_mask |= 0x1;
enabled_channels |= 0x1;
}
if (data[1]) {
outputs[1] = nir_convert_to_bit_size(b, data[1], base_type, 32);
write_mask |= 0x2;
enabled_channels |= 0x2;
}
break;
case V_028714_SPI_SHADER_32_AR:
if (data[0]) {
outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32);
write_mask |= 0x1;
enabled_channels |= 0x1;
}
if (data[3]) {
unsigned index = s->options->gfx_level >= GFX10 ? 1 : 3;
outputs[index] = nir_convert_to_bit_size(b, data[3], base_type, 32);
write_mask |= BITFIELD_BIT(index);
enabled_channels |= BITFIELD_BIT(index);
}
break;
@ -414,7 +414,7 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
for (int i = 0; i < 4; i++) {
if (data[i]) {
outputs[i] = nir_convert_to_bit_size(b, data[i], base_type, 32);
write_mask |= BITFIELD_BIT(i);
enabled_channels |= BITFIELD_BIT(i);
}
}
break;
@ -499,9 +499,9 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
}
if (s->options->gfx_level >= GFX11)
write_mask |= BITFIELD_BIT(i);
enabled_channels |= BITFIELD_BIT(i);
else
write_mask |= 0x3 << (i * 2);
enabled_channels |= 0x3 << (i * 2);
}
if (s->options->gfx_level < GFX11)
@ -509,14 +509,10 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
}
}
nir_intrinsic_instr *exp = nir_export_amd(b, nir_vec(b, outputs, 4),
.base = target,
.flags = flags);
/* Set the writemask explicitly because write_mask=0 means full write mask. */
nir_intrinsic_set_write_mask(exp, write_mask);
s->exp[s->exp_num++] = exp;
s->exp[s->exp_num++] = nir_export_amd(b, nir_vec(b, outputs, 4),
.base = target,
.enabled_channels = enabled_channels,
.flags = flags);
return true;
}
@ -551,9 +547,9 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
nir_instr_move(nir_before_instr(&mrt1_exp->instr), &mrt0_exp->instr);
}
uint32_t mrt0_write_mask = nir_intrinsic_write_mask(mrt0_exp);
uint32_t mrt1_write_mask = nir_intrinsic_write_mask(mrt1_exp);
uint32_t write_mask = mrt0_write_mask | mrt1_write_mask;
uint32_t mrt0_enabled_channels = nir_intrinsic_enabled_channels(mrt0_exp);
uint32_t mrt1_enabled_channels = nir_intrinsic_enabled_channels(mrt1_exp);
uint32_t enabled_channels = mrt0_enabled_channels | mrt1_enabled_channels;
nir_def *mrt0_arg = mrt0_exp->src[0].ssa;
nir_def *mrt1_arg = mrt1_exp->src[0].ssa;
@ -563,7 +559,8 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
/* ACO need to emit the swizzle code by a pseudo instruction. */
if (s->options->use_aco) {
nir_export_dual_src_blend_amd(b, mrt0_arg, mrt1_arg, .write_mask = write_mask);
nir_export_dual_src_blend_amd(b, mrt0_arg, mrt1_arg,
.enabled_channels = MAX2(1, enabled_channels));
nir_instr_remove(&mrt0_exp->instr);
nir_instr_remove(&mrt1_exp->instr);
return;
@ -581,7 +578,7 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
* lane0 export arg00 and arg10
* lane1 export arg01 and arg11.
*/
u_foreach_bit (i, write_mask) {
u_foreach_bit (i, enabled_channels) {
nir_def *arg0 = nir_channel(b, mrt0_arg, i);
nir_def *arg1 = nir_channel(b, mrt1_arg, i);
@ -606,8 +603,8 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
nir_src_rewrite(&mrt0_exp->src[0], nir_vec(b, arg0_vec, 4));
nir_src_rewrite(&mrt1_exp->src[0], nir_vec(b, arg1_vec, 4));
nir_intrinsic_set_write_mask(mrt0_exp, write_mask);
nir_intrinsic_set_write_mask(mrt1_exp, write_mask);
nir_intrinsic_set_enabled_channels(mrt0_exp, enabled_channels);
nir_intrinsic_set_enabled_channels(mrt1_exp, enabled_channels);
}
static void
@ -635,12 +632,9 @@ emit_ps_null_export(nir_builder *b, lower_ps_state *s)
unsigned target = s->options->gfx_level >= GFX11 ?
V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL;
nir_intrinsic_instr *intrin =
nir_export_amd(b, nir_undef(b, 4, 32),
.base = target,
.flags = AC_EXP_FLAG_VALID_MASK | AC_EXP_FLAG_DONE);
/* To avoid builder set write mask to 0xf. */
nir_intrinsic_set_write_mask(intrin, 0);
nir_export_amd(b, nir_undef(b, 4, 32),
.base = target,
.flags = AC_EXP_FLAG_VALID_MASK | AC_EXP_FLAG_DONE);
}
static bool

View file

@ -214,10 +214,10 @@ export(nir_builder *b, nir_def *val, nir_def *row, unsigned base, unsigned flags
{
if (row) {
return nir_export_row_amd(b, val, row, .base = base, .flags = flags,
.write_mask = write_mask);
.enabled_channels = write_mask);
} else {
return nir_export_amd(b, val, .base = base, .flags = flags,
.write_mask = write_mask);
.enabled_channels = write_mask);
}
}
@ -464,7 +464,7 @@ ac_nir_export_parameters(nir_builder *b,
nir_export_amd(
b, get_export_output(b, out->outputs[slot]),
.base = V_008DFC_SQ_EXP_PARAM + offset,
.write_mask = write_mask);
.enabled_channels = write_mask);
exported_params |= BITFIELD_BIT(offset);
}
}
@ -826,7 +826,7 @@ ac_nir_ngg_alloc_vertices_fully_culled_workaround(nir_builder *b,
nir_export_amd(b, nir_imm_zero(b, 4, 32),
.base = V_008DFC_SQ_EXP_PRIM,
.flags = AC_EXP_FLAG_DONE,
.write_mask = 1);
.enabled_channels = 1);
/* The HW culls primitives with NaN. -1 is also NaN and can save
* a dword in binary code by inlining constant.
@ -834,7 +834,7 @@ ac_nir_ngg_alloc_vertices_fully_culled_workaround(nir_builder *b,
nir_export_amd(b, nir_imm_ivec4(b, -1, -1, -1, -1),
.base = V_008DFC_SQ_EXP_POS,
.flags = AC_EXP_FLAG_DONE,
.write_mask = 0xf);
.enabled_channels = 0xf);
}
nir_pop_if(b, if_thread_0);
}

View file

@ -4841,7 +4841,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_export_row_amd: {
unsigned flags = nir_intrinsic_flags(instr);
unsigned target = nir_intrinsic_base(instr);
unsigned write_mask = nir_intrinsic_write_mask(instr);
unsigned write_mask = nir_intrinsic_enabled_channels(instr);
/* Mark vertex export block. */
if (target == V_008DFC_SQ_EXP_POS || target <= V_008DFC_SQ_EXP_NULL)
@ -4907,7 +4907,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_export_dual_src_blend_amd: {
Temp val0 = get_ssa_temp(ctx, instr->src[0].ssa);
Temp val1 = get_ssa_temp(ctx, instr->src[1].ssa);
unsigned write_mask = nir_intrinsic_write_mask(instr);
unsigned write_mask = nir_intrinsic_enabled_channels(instr);
struct aco_export_mrt mrt0, mrt1;
for (unsigned i = 0; i < 4; i++) {

View file

@ -3341,7 +3341,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
case nir_intrinsic_export_amd: {
unsigned flags = nir_intrinsic_flags(instr);
unsigned target = nir_intrinsic_base(instr);
unsigned write_mask = nir_intrinsic_write_mask(instr);
unsigned write_mask = nir_intrinsic_enabled_channels(instr);
struct ac_export_args args = {
.target = target,

View file

@ -148,6 +148,9 @@ index("int", "base")
# For store instructions, a writemask for the store.
index("unsigned", "write_mask")
# Same as write_mask, but can be 0 and still have side effects
index("unsigned", "enabled_channels")
# The stream-id for GS emit_vertex/end_primitive intrinsics.
index("unsigned", "stream_id")
@ -2209,12 +2212,12 @@ system_value("lds_ngg_gs_out_vertex_base_amd", 1)
# src[] = { export_value, row }
# BASE = export target
# FLAGS = AC_EXP_FLAG_*
intrinsic("export_amd", [0], indices=[BASE, WRITE_MASK, FLAGS])
intrinsic("export_row_amd", [0, 1], indices=[BASE, WRITE_MASK, FLAGS])
intrinsic("export_amd", [0], indices=[BASE, ENABLED_CHANNELS, FLAGS])
intrinsic("export_row_amd", [0, 1], indices=[BASE, ENABLED_CHANNELS, FLAGS])
# Export dual source blend outputs with swizzle operation
# src[] = { mrt0, mrt1 }
intrinsic("export_dual_src_blend_amd", [0, 0], indices=[WRITE_MASK])
intrinsic("export_dual_src_blend_amd", [0, 0], indices=[ENABLED_CHANNELS])
# Alpha test reference value
system_value("alpha_reference_amd", 1)