nir: change export_amd intrinsics to use enabled_channels instead of write_mask
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40415>
This commit is contained in:
parent
e7f6c8ab7e
commit
b75a3112fd
5 changed files with 47 additions and 50 deletions
|
|
@ -203,7 +203,7 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha)
|
|||
|
||||
nir_def *undef = nir_undef(b, 1, 32);
|
||||
nir_def *outputs[4] = {undef, undef, undef, undef};
|
||||
unsigned write_mask = 0;
|
||||
unsigned enabled_channels = 0;
|
||||
unsigned flags = 0;
|
||||
|
||||
if (format == V_028710_SPI_SHADER_UINT16_ABGR) {
|
||||
|
|
@ -214,30 +214,30 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha)
|
|||
|
||||
if (s->stencil) {
|
||||
outputs[0] = nir_ishl_imm(b, s->stencil, 16);
|
||||
write_mask |= s->options->gfx_level >= GFX11 ? 0x1 : 0x3;
|
||||
enabled_channels |= s->options->gfx_level >= GFX11 ? 0x1 : 0x3;
|
||||
}
|
||||
|
||||
if (s->sample_mask) {
|
||||
outputs[1] = s->sample_mask;
|
||||
write_mask |= s->options->gfx_level >= GFX11 ? 0x2 : 0xc;
|
||||
enabled_channels |= s->options->gfx_level >= GFX11 ? 0x2 : 0xc;
|
||||
}
|
||||
} else {
|
||||
if (s->depth) {
|
||||
outputs[0] = s->depth;
|
||||
write_mask |= 0x1;
|
||||
enabled_channels |= 0x1;
|
||||
}
|
||||
|
||||
if (s->stencil) {
|
||||
assert(format == V_028710_SPI_SHADER_32_GR ||
|
||||
format == V_028710_SPI_SHADER_32_ABGR);
|
||||
outputs[1] = s->stencil;
|
||||
write_mask |= 0x2;
|
||||
enabled_channels |= 0x2;
|
||||
}
|
||||
|
||||
if (s->sample_mask) {
|
||||
assert(format == V_028710_SPI_SHADER_32_ABGR);
|
||||
outputs[2] = s->sample_mask;
|
||||
write_mask |= 0x4;
|
||||
enabled_channels |= 0x4;
|
||||
}
|
||||
|
||||
if (mrtz_alpha) {
|
||||
|
|
@ -245,17 +245,17 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s, nir_def *mrtz_alpha)
|
|||
format == V_028710_SPI_SHADER_32_ABGR);
|
||||
if (format == V_028710_SPI_SHADER_32_AR && s->options->gfx_level >= GFX10) {
|
||||
outputs[1] = mrtz_alpha;
|
||||
write_mask |= 0x2;
|
||||
enabled_channels |= 0x2;
|
||||
} else {
|
||||
outputs[3] = mrtz_alpha;
|
||||
write_mask |= 0x8;
|
||||
enabled_channels |= 0x8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s->exp[s->exp_num++] = nir_export_amd(b, nir_vec(b, outputs, 4),
|
||||
.base = V_008DFC_SQ_EXP_MRTZ,
|
||||
.write_mask = write_mask,
|
||||
.enabled_channels = enabled_channels,
|
||||
.flags = flags);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -357,7 +357,7 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
|
|||
|
||||
nir_def *undef = nir_undef(b, 1, 32);
|
||||
nir_def *outputs[4] = {undef, undef, undef, undef};
|
||||
unsigned write_mask = 0;
|
||||
unsigned enabled_channels = 0;
|
||||
unsigned flags = 0;
|
||||
|
||||
nir_alu_type type = s->color_type[output_index];
|
||||
|
|
@ -381,32 +381,32 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
|
|||
case V_028714_SPI_SHADER_32_R:
|
||||
if (data[0]) {
|
||||
outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32);
|
||||
write_mask = 0x1;
|
||||
enabled_channels = 0x1;
|
||||
}
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_32_GR:
|
||||
if (data[0]) {
|
||||
outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32);
|
||||
write_mask |= 0x1;
|
||||
enabled_channels |= 0x1;
|
||||
}
|
||||
|
||||
if (data[1]) {
|
||||
outputs[1] = nir_convert_to_bit_size(b, data[1], base_type, 32);
|
||||
write_mask |= 0x2;
|
||||
enabled_channels |= 0x2;
|
||||
}
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_32_AR:
|
||||
if (data[0]) {
|
||||
outputs[0] = nir_convert_to_bit_size(b, data[0], base_type, 32);
|
||||
write_mask |= 0x1;
|
||||
enabled_channels |= 0x1;
|
||||
}
|
||||
|
||||
if (data[3]) {
|
||||
unsigned index = s->options->gfx_level >= GFX10 ? 1 : 3;
|
||||
outputs[index] = nir_convert_to_bit_size(b, data[3], base_type, 32);
|
||||
write_mask |= BITFIELD_BIT(index);
|
||||
enabled_channels |= BITFIELD_BIT(index);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
@ -414,7 +414,7 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
|
|||
for (int i = 0; i < 4; i++) {
|
||||
if (data[i]) {
|
||||
outputs[i] = nir_convert_to_bit_size(b, data[i], base_type, 32);
|
||||
write_mask |= BITFIELD_BIT(i);
|
||||
enabled_channels |= BITFIELD_BIT(i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
@ -499,9 +499,9 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
|
|||
}
|
||||
|
||||
if (s->options->gfx_level >= GFX11)
|
||||
write_mask |= BITFIELD_BIT(i);
|
||||
enabled_channels |= BITFIELD_BIT(i);
|
||||
else
|
||||
write_mask |= 0x3 << (i * 2);
|
||||
enabled_channels |= 0x3 << (i * 2);
|
||||
}
|
||||
|
||||
if (s->options->gfx_level < GFX11)
|
||||
|
|
@ -509,14 +509,10 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, unsigned output_index, u
|
|||
}
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *exp = nir_export_amd(b, nir_vec(b, outputs, 4),
|
||||
.base = target,
|
||||
.flags = flags);
|
||||
|
||||
/* Set the writemask explicitly because write_mask=0 means full write mask. */
|
||||
nir_intrinsic_set_write_mask(exp, write_mask);
|
||||
|
||||
s->exp[s->exp_num++] = exp;
|
||||
s->exp[s->exp_num++] = nir_export_amd(b, nir_vec(b, outputs, 4),
|
||||
.base = target,
|
||||
.enabled_channels = enabled_channels,
|
||||
.flags = flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -551,9 +547,9 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
|
|||
nir_instr_move(nir_before_instr(&mrt1_exp->instr), &mrt0_exp->instr);
|
||||
}
|
||||
|
||||
uint32_t mrt0_write_mask = nir_intrinsic_write_mask(mrt0_exp);
|
||||
uint32_t mrt1_write_mask = nir_intrinsic_write_mask(mrt1_exp);
|
||||
uint32_t write_mask = mrt0_write_mask | mrt1_write_mask;
|
||||
uint32_t mrt0_enabled_channels = nir_intrinsic_enabled_channels(mrt0_exp);
|
||||
uint32_t mrt1_enabled_channels = nir_intrinsic_enabled_channels(mrt1_exp);
|
||||
uint32_t enabled_channels = mrt0_enabled_channels | mrt1_enabled_channels;
|
||||
|
||||
nir_def *mrt0_arg = mrt0_exp->src[0].ssa;
|
||||
nir_def *mrt1_arg = mrt1_exp->src[0].ssa;
|
||||
|
|
@ -563,7 +559,8 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
|
|||
|
||||
/* ACO need to emit the swizzle code by a pseudo instruction. */
|
||||
if (s->options->use_aco) {
|
||||
nir_export_dual_src_blend_amd(b, mrt0_arg, mrt1_arg, .write_mask = write_mask);
|
||||
nir_export_dual_src_blend_amd(b, mrt0_arg, mrt1_arg,
|
||||
.enabled_channels = MAX2(1, enabled_channels));
|
||||
nir_instr_remove(&mrt0_exp->instr);
|
||||
nir_instr_remove(&mrt1_exp->instr);
|
||||
return;
|
||||
|
|
@ -581,7 +578,7 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
|
|||
* lane0 export arg00 and arg10
|
||||
* lane1 export arg01 and arg11.
|
||||
*/
|
||||
u_foreach_bit (i, write_mask) {
|
||||
u_foreach_bit (i, enabled_channels) {
|
||||
nir_def *arg0 = nir_channel(b, mrt0_arg, i);
|
||||
nir_def *arg1 = nir_channel(b, mrt1_arg, i);
|
||||
|
||||
|
|
@ -606,8 +603,8 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
|
|||
nir_src_rewrite(&mrt0_exp->src[0], nir_vec(b, arg0_vec, 4));
|
||||
nir_src_rewrite(&mrt1_exp->src[0], nir_vec(b, arg1_vec, 4));
|
||||
|
||||
nir_intrinsic_set_write_mask(mrt0_exp, write_mask);
|
||||
nir_intrinsic_set_write_mask(mrt1_exp, write_mask);
|
||||
nir_intrinsic_set_enabled_channels(mrt0_exp, enabled_channels);
|
||||
nir_intrinsic_set_enabled_channels(mrt1_exp, enabled_channels);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -635,12 +632,9 @@ emit_ps_null_export(nir_builder *b, lower_ps_state *s)
|
|||
unsigned target = s->options->gfx_level >= GFX11 ?
|
||||
V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL;
|
||||
|
||||
nir_intrinsic_instr *intrin =
|
||||
nir_export_amd(b, nir_undef(b, 4, 32),
|
||||
.base = target,
|
||||
.flags = AC_EXP_FLAG_VALID_MASK | AC_EXP_FLAG_DONE);
|
||||
/* To avoid builder set write mask to 0xf. */
|
||||
nir_intrinsic_set_write_mask(intrin, 0);
|
||||
nir_export_amd(b, nir_undef(b, 4, 32),
|
||||
.base = target,
|
||||
.flags = AC_EXP_FLAG_VALID_MASK | AC_EXP_FLAG_DONE);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -214,10 +214,10 @@ export(nir_builder *b, nir_def *val, nir_def *row, unsigned base, unsigned flags
|
|||
{
|
||||
if (row) {
|
||||
return nir_export_row_amd(b, val, row, .base = base, .flags = flags,
|
||||
.write_mask = write_mask);
|
||||
.enabled_channels = write_mask);
|
||||
} else {
|
||||
return nir_export_amd(b, val, .base = base, .flags = flags,
|
||||
.write_mask = write_mask);
|
||||
.enabled_channels = write_mask);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -464,7 +464,7 @@ ac_nir_export_parameters(nir_builder *b,
|
|||
nir_export_amd(
|
||||
b, get_export_output(b, out->outputs[slot]),
|
||||
.base = V_008DFC_SQ_EXP_PARAM + offset,
|
||||
.write_mask = write_mask);
|
||||
.enabled_channels = write_mask);
|
||||
exported_params |= BITFIELD_BIT(offset);
|
||||
}
|
||||
}
|
||||
|
|
@ -826,7 +826,7 @@ ac_nir_ngg_alloc_vertices_fully_culled_workaround(nir_builder *b,
|
|||
nir_export_amd(b, nir_imm_zero(b, 4, 32),
|
||||
.base = V_008DFC_SQ_EXP_PRIM,
|
||||
.flags = AC_EXP_FLAG_DONE,
|
||||
.write_mask = 1);
|
||||
.enabled_channels = 1);
|
||||
|
||||
/* The HW culls primitives with NaN. -1 is also NaN and can save
|
||||
* a dword in binary code by inlining constant.
|
||||
|
|
@ -834,7 +834,7 @@ ac_nir_ngg_alloc_vertices_fully_culled_workaround(nir_builder *b,
|
|||
nir_export_amd(b, nir_imm_ivec4(b, -1, -1, -1, -1),
|
||||
.base = V_008DFC_SQ_EXP_POS,
|
||||
.flags = AC_EXP_FLAG_DONE,
|
||||
.write_mask = 0xf);
|
||||
.enabled_channels = 0xf);
|
||||
}
|
||||
nir_pop_if(b, if_thread_0);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4841,7 +4841,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
case nir_intrinsic_export_row_amd: {
|
||||
unsigned flags = nir_intrinsic_flags(instr);
|
||||
unsigned target = nir_intrinsic_base(instr);
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
unsigned write_mask = nir_intrinsic_enabled_channels(instr);
|
||||
|
||||
/* Mark vertex export block. */
|
||||
if (target == V_008DFC_SQ_EXP_POS || target <= V_008DFC_SQ_EXP_NULL)
|
||||
|
|
@ -4907,7 +4907,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
case nir_intrinsic_export_dual_src_blend_amd: {
|
||||
Temp val0 = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
Temp val1 = get_ssa_temp(ctx, instr->src[1].ssa);
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
unsigned write_mask = nir_intrinsic_enabled_channels(instr);
|
||||
|
||||
struct aco_export_mrt mrt0, mrt1;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
|
|
|
|||
|
|
@ -3341,7 +3341,7 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
case nir_intrinsic_export_amd: {
|
||||
unsigned flags = nir_intrinsic_flags(instr);
|
||||
unsigned target = nir_intrinsic_base(instr);
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
unsigned write_mask = nir_intrinsic_enabled_channels(instr);
|
||||
|
||||
struct ac_export_args args = {
|
||||
.target = target,
|
||||
|
|
|
|||
|
|
@ -148,6 +148,9 @@ index("int", "base")
|
|||
# For store instructions, a writemask for the store.
|
||||
index("unsigned", "write_mask")
|
||||
|
||||
# Same as write_mask, but can be 0 and still have side effects
|
||||
index("unsigned", "enabled_channels")
|
||||
|
||||
# The stream-id for GS emit_vertex/end_primitive intrinsics.
|
||||
index("unsigned", "stream_id")
|
||||
|
||||
|
|
@ -2209,12 +2212,12 @@ system_value("lds_ngg_gs_out_vertex_base_amd", 1)
|
|||
# src[] = { export_value, row }
|
||||
# BASE = export target
|
||||
# FLAGS = AC_EXP_FLAG_*
|
||||
intrinsic("export_amd", [0], indices=[BASE, WRITE_MASK, FLAGS])
|
||||
intrinsic("export_row_amd", [0, 1], indices=[BASE, WRITE_MASK, FLAGS])
|
||||
intrinsic("export_amd", [0], indices=[BASE, ENABLED_CHANNELS, FLAGS])
|
||||
intrinsic("export_row_amd", [0, 1], indices=[BASE, ENABLED_CHANNELS, FLAGS])
|
||||
|
||||
# Export dual source blend outputs with swizzle operation
|
||||
# src[] = { mrt0, mrt1 }
|
||||
intrinsic("export_dual_src_blend_amd", [0, 0], indices=[WRITE_MASK])
|
||||
intrinsic("export_dual_src_blend_amd", [0, 0], indices=[ENABLED_CHANNELS])
|
||||
|
||||
# Alpha test reference value
|
||||
system_value("alpha_reference_amd", 1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue