From b75a3112fda70227ae6f9ee07c7ecd136b0a03a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 24 Feb 2026 09:18:31 -0500 Subject: [PATCH] nir: change export_amd intrinsics to use enabled_channels instead of write_mask Acked-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Rhys Perry Part-of: --- src/amd/common/nir/ac_nir_lower_ps_late.c | 72 +++++++++---------- src/amd/common/nir/ac_nir_prerast_utils.c | 10 +-- .../aco_select_nir_intrinsics.cpp | 4 +- src/amd/llvm/ac_nir_to_llvm.c | 2 +- src/compiler/nir/nir_intrinsics.py | 9 ++- 5 files changed, 47 insertions(+), 50 deletions(-) diff --git a/src/amd/common/nir/ac_nir_lower_ps_late.c b/src/amd/common/nir/ac_nir_lower_ps_late.c index fffb1513c8f..9ffed340be4 100644 --- a/src/amd/common/nir/ac_nir_lower_ps_late.c +++ b/src/amd/common/nir/ac_nir_lower_ps_late.c @@ -203,7 +203,7 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha) nir_def *undef = nir_undef(b, 1, 32); nir_def *outputs[4] = {undef, undef, undef, undef}; - unsigned write_mask = 0; + unsigned enabled_channels = 0; unsigned flags = 0; if (format == V_028710_SPI_SHADER_UINT16_ABGR) { @@ -214,30 +214,30 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha) if (s->stencil) { outputs[0] = nir_ishl_imm(b, s->stencil, 16); - write_mask |= s->options->gfx_level >= GFX11 ? 0x1 : 0x3; + enabled_channels |= s->options->gfx_level >= GFX11 ? 0x1 : 0x3; } if (s->sample_mask) { outputs[1] = s->sample_mask; - write_mask |= s->options->gfx_level >= GFX11 ? 0x2 : 0xc; + enabled_channels |= s->options->gfx_level >= GFX11 ? 0x2 : 0xc; } } else { if (s->depth) { outputs[0] = s->depth; - write_mask |= 0x1; + enabled_channels |= 0x1; } if (s->stencil) { assert(format == V_028710_SPI_SHADER_32_GR || format == V_028710_SPI_SHADER_32_ABGR); outputs[1] = s->stencil; - write_mask |= 0x2; + enabled_channels |= 0x2; } if (s->sample_mask) { assert(format == V_028710_SPI_SHADER_32_ABGR); outputs[2] = s->sample_mask; - write_mask |= 0x4; + enabled_channels |= 0x4; } if (mrtz_alpha) { @@ -245,17 +245,17 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha) format == V_028710_SPI_SHADER_32_ABGR); if (format == V_028710_SPI_SHADER_32_AR && s->options->gfx_level >= GFX10) { outputs[1] = mrtz_alpha; - write_mask |= 0x2; + enabled_channels |= 0x2; } else { outputs[3] = mrtz_alpha; - write_mask |= 0x8; + enabled_channels |= 0x8; } } } s->exp[s->exp_num++] = nir_export_amd(b, nir_vec(b, outputs, 4), .base = V_008DFC_SQ_EXP_MRTZ, - .write_mask = write_mask, + .enabled_channels = enabled_channels, .flags = flags); return true; } @@ -357,7 +357,7 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u nir_def *undef = nir_undef(b, 1, 32); nir_def *outputs[4] = {undef, undef, undef, undef}; - unsigned write_mask = 0; + unsigned enabled_channels = 0; unsigned flags = 0; nir_alu_type type = s->color_type[output_index]; @@ -381,32 +381,32 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u case V_028714_SPI_SHADER_32_R: if (data[0]) { outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32); - write_mask = 0x1; + enabled_channels = 0x1; } break; case V_028714_SPI_SHADER_32_GR: if (data[0]) { outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32); - write_mask |= 0x1; + enabled_channels |= 0x1; } if (data[1]) { outputs[1] = nir_convert_to_bit_size(b, data[1], base_type, 32); - write_mask |= 0x2; + enabled_channels |= 0x2; } break; case V_028714_SPI_SHADER_32_AR: if (data[0]) { outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32); - write_mask |= 0x1; + enabled_channels |= 0x1; } if (data[3]) { unsigned index = s->options->gfx_level >= GFX10 ? 1 : 3; outputs[index] = nir_convert_to_bit_size(b, data[3], base_type, 32); - write_mask |= BITFIELD_BIT(index); + enabled_channels |= BITFIELD_BIT(index); } break; @@ -414,7 +414,7 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u for (int i = 0; i < 4; i++) { if (data[i]) { outputs[i] = nir_convert_to_bit_size(b, data[i], base_type, 32); - write_mask |= BITFIELD_BIT(i); + enabled_channels |= BITFIELD_BIT(i); } } break; @@ -499,9 +499,9 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u } if (s->options->gfx_level >= GFX11) - write_mask |= BITFIELD_BIT(i); + enabled_channels |= BITFIELD_BIT(i); else - write_mask |= 0x3 << (i * 2); + enabled_channels |= 0x3 << (i * 2); } if (s->options->gfx_level < GFX11) @@ -509,14 +509,10 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u } } - nir_intrinsic_instr *exp = nir_export_amd(b, nir_vec(b, outputs, 4), - .base = target, - .flags = flags); - - /* Set the writemask explicitly because write_mask=0 means full write mask. */ - nir_intrinsic_set_write_mask(exp, write_mask); - - s->exp[s->exp_num++] = exp; + s->exp[s->exp_num++] = nir_export_amd(b, nir_vec(b, outputs, 4), + .base = target, + .enabled_channels = enabled_channels, + .flags = flags); return true; } @@ -551,9 +547,9 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first nir_instr_move(nir_before_instr(&mrt1_exp->instr), &mrt0_exp->instr); } - uint32_t mrt0_write_mask = nir_intrinsic_write_mask(mrt0_exp); - uint32_t mrt1_write_mask = nir_intrinsic_write_mask(mrt1_exp); - uint32_t write_mask = mrt0_write_mask | mrt1_write_mask; + uint32_t mrt0_enabled_channels = nir_intrinsic_enabled_channels(mrt0_exp); + uint32_t mrt1_enabled_channels = nir_intrinsic_enabled_channels(mrt1_exp); + uint32_t enabled_channels = mrt0_enabled_channels | mrt1_enabled_channels; nir_def *mrt0_arg = mrt0_exp->src[0].ssa; nir_def *mrt1_arg = mrt1_exp->src[0].ssa; @@ -563,7 +559,8 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first /* ACO need to emit the swizzle code by a pseudo instruction. */ if (s->options->use_aco) { - nir_export_dual_src_blend_amd(b, mrt0_arg, mrt1_arg, .write_mask = write_mask); + nir_export_dual_src_blend_amd(b, mrt0_arg, mrt1_arg, + .enabled_channels = MAX2(1, enabled_channels)); nir_instr_remove(&mrt0_exp->instr); nir_instr_remove(&mrt1_exp->instr); return; @@ -581,7 +578,7 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first * lane0 export arg00 and arg10 * lane1 export arg01 and arg11. */ - u_foreach_bit (i, write_mask) { + u_foreach_bit (i, enabled_channels) { nir_def *arg0 = nir_channel(b, mrt0_arg, i); nir_def *arg1 = nir_channel(b, mrt1_arg, i); @@ -606,8 +603,8 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first nir_src_rewrite(&mrt0_exp->src[0], nir_vec(b, arg0_vec, 4)); nir_src_rewrite(&mrt1_exp->src[0], nir_vec(b, arg1_vec, 4)); - nir_intrinsic_set_write_mask(mrt0_exp, write_mask); - nir_intrinsic_set_write_mask(mrt1_exp, write_mask); + nir_intrinsic_set_enabled_channels(mrt0_exp, enabled_channels); + nir_intrinsic_set_enabled_channels(mrt1_exp, enabled_channels); } static void @@ -635,12 +632,9 @@ emit_ps_null_export(nir_builder *b, lower_ps_state *s) unsigned target = s->options->gfx_level >= GFX11 ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL; - nir_intrinsic_instr *intrin = - nir_export_amd(b, nir_undef(b, 4, 32), - .base = target, - .flags = AC_EXP_FLAG_VALID_MASK | AC_EXP_FLAG_DONE); - /* To avoid builder set write mask to 0xf. */ - nir_intrinsic_set_write_mask(intrin, 0); + nir_export_amd(b, nir_undef(b, 4, 32), + .base = target, + .flags = AC_EXP_FLAG_VALID_MASK | AC_EXP_FLAG_DONE); } static bool diff --git a/src/amd/common/nir/ac_nir_prerast_utils.c b/src/amd/common/nir/ac_nir_prerast_utils.c index 08b56b025d2..b85400f0428 100644 --- a/src/amd/common/nir/ac_nir_prerast_utils.c +++ b/src/amd/common/nir/ac_nir_prerast_utils.c @@ -214,10 +214,10 @@ export(nir_builder *b, nir_def *val, nir_def *row, unsigned base, unsigned flags { if (row) { return nir_export_row_amd(b, val, row, .base = base, .flags = flags, - .write_mask = write_mask); + .enabled_channels = write_mask); } else { return nir_export_amd(b, val, .base = base, .flags = flags, - .write_mask = write_mask); + .enabled_channels = write_mask); } } @@ -464,7 +464,7 @@ ac_nir_export_parameters(nir_builder *b, nir_export_amd( b, get_export_output(b, out->outputs[slot]), .base = V_008DFC_SQ_EXP_PARAM + offset, - .write_mask = write_mask); + .enabled_channels = write_mask); exported_params |= BITFIELD_BIT(offset); } } @@ -826,7 +826,7 @@ ac_nir_ngg_alloc_vertices_fully_culled_workaround(nir_builder *b, nir_export_amd(b, nir_imm_zero(b, 4, 32), .base = V_008DFC_SQ_EXP_PRIM, .flags = AC_EXP_FLAG_DONE, - .write_mask = 1); + .enabled_channels = 1); /* The HW culls primitives with NaN. -1 is also NaN and can save * a dword in binary code by inlining constant. @@ -834,7 +834,7 @@ ac_nir_ngg_alloc_vertices_fully_culled_workaround(nir_builder *b, nir_export_amd(b, nir_imm_ivec4(b, -1, -1, -1, -1), .base = V_008DFC_SQ_EXP_POS, .flags = AC_EXP_FLAG_DONE, - .write_mask = 0xf); + .enabled_channels = 0xf); } nir_pop_if(b, if_thread_0); } diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 977f870d5a7..bf191749d77 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -4841,7 +4841,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_export_row_amd: { unsigned flags = nir_intrinsic_flags(instr); unsigned target = nir_intrinsic_base(instr); - unsigned write_mask = nir_intrinsic_write_mask(instr); + unsigned write_mask = nir_intrinsic_enabled_channels(instr); /* Mark vertex export block. */ if (target == V_008DFC_SQ_EXP_POS || target <= V_008DFC_SQ_EXP_NULL) @@ -4907,7 +4907,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_export_dual_src_blend_amd: { Temp val0 = get_ssa_temp(ctx, instr->src[0].ssa); Temp val1 = get_ssa_temp(ctx, instr->src[1].ssa); - unsigned write_mask = nir_intrinsic_write_mask(instr); + unsigned write_mask = nir_intrinsic_enabled_channels(instr); struct aco_export_mrt mrt0, mrt1; for (unsigned i = 0; i < 4; i++) { diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index a4335deb1e2..6f6ce9a2535 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -3341,7 +3341,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins case nir_intrinsic_export_amd: { unsigned flags = nir_intrinsic_flags(instr); unsigned target = nir_intrinsic_base(instr); - unsigned write_mask = nir_intrinsic_write_mask(instr); + unsigned write_mask = nir_intrinsic_enabled_channels(instr); struct ac_export_args args = { .target = target, diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index d474a1a0ca0..50eed873747 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -148,6 +148,9 @@ index("int", "base") # For store instructions, a writemask for the store. index("unsigned", "write_mask") +# Same as write_mask, but can be 0 and still have side effects +index("unsigned", "enabled_channels") + # The stream-id for GS emit_vertex/end_primitive intrinsics. index("unsigned", "stream_id") @@ -2209,12 +2212,12 @@ system_value("lds_ngg_gs_out_vertex_base_amd", 1) # src[] = { export_value, row } # BASE = export target # FLAGS = AC_EXP_FLAG_* -intrinsic("export_amd", [0], indices=[BASE, WRITE_MASK, FLAGS]) -intrinsic("export_row_amd", [0, 1], indices=[BASE, WRITE_MASK, FLAGS]) +intrinsic("export_amd", [0], indices=[BASE, ENABLED_CHANNELS, FLAGS]) +intrinsic("export_row_amd", [0, 1], indices=[BASE, ENABLED_CHANNELS, FLAGS]) # Export dual source blend outputs with swizzle operation # src[] = { mrt0, mrt1 } -intrinsic("export_dual_src_blend_amd", [0, 0], indices=[WRITE_MASK]) +intrinsic("export_dual_src_blend_amd", [0, 0], indices=[ENABLED_CHANNELS]) # Alpha test reference value system_value("alpha_reference_amd", 1)