brw: move coarse_z computation to NIR

So that we can print it easily with debug printfs

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38996>
This commit is contained in:
Lionel Landwerlin 2025-12-17 16:59:32 +02:00 committed by Marge Bot
parent 98194dfa0b
commit a19e949824
7 changed files with 85 additions and 84 deletions

View file

@ -2484,14 +2484,12 @@ image("store_raw_intel", src_comp=[1, 0])
# Maximum number of polygons processed in the fragment shader
system_value("max_polygon_intel", 1, bit_sizes=[32])
# Maximum number of polygons processed in the fragment shader
intel_fs_values = [
(2, "start"), # X/Y coordinate (screen space) for upper-left vertex of a triangle being rasterized
(2, "z_c"), # z_c Cx/Cy for z plane
(1, "z_c0"), # z_c0 Co for z plane
]
for v in intel_fs_values:
system_value("fs_{0}_intel".format(v[1]), v[0], bit_sizes=[32])
# Screen-space X/Y coordinate of upper-left vertex of the triangle being rasterized
system_value("fs_start_intel", 2, bit_sizes=[32])
# z_c, z_c0 (Cx/Cy/Co for Z plane)
system_value("fs_z_c_intel", 2, bit_sizes=[32])
system_value("fs_z_c0_intel", 1, bit_sizes=[32])
# Read the attribute thread payload at a given byte offset
# src[] = { offset }

View file

@ -7,9 +7,9 @@ traces:
0ad/0ad-v2.trace:
gl-zink-anv-adl:
label: [no-perf]
checksum: 0d04e54fa259d407433a7925c739900a
checksum: ff0d4b072dd613b6f11c351027db3bb3
gl-zink-anv-tgl:
checksum: 0d04e54fa259d407433a7925c739900a
checksum: ff0d4b072dd613b6f11c351027db3bb3
zink-radv-vangogh:
checksum: 52cabbe16a14628f92df31e0fb4c109e
zink-radv-gfx1201:

View file

@ -451,83 +451,28 @@ brw_emit_interpolation_setup(brw_shader &s)
}
}
abld = bld.annotate("compute pos.z");
brw_reg coarse_z;
if (wm_prog_data->coarse_pixel_dispatch != INTEL_NEVER &&
wm_prog_data->uses_depth_w_coefficients) {
/* In coarse pixel mode, the HW doesn't interpolate Z coordinate
* properly. In the same way we have to add the coarse pixel size to
* pixels locations, here we recompute the Z value with 2 coefficients
* in X & Y axis.
*
* src_z = (x - xstart)*z_cx + (y - ystart)*z_cy + z_c0
*/
if (wm_prog_data->uses_depth_w_coefficients) {
brw_reg coef_payload = brw_vec8_grf(payload.depth_w_coef_reg, 0);
const brw_reg x_start = devinfo->ver >= 20 ?
s.x_start = devinfo->ver >= 20 ?
brw_vec1_grf(coef_payload.nr, 6) :
brw_vec1_grf(coef_payload.nr, 2);
const brw_reg y_start = devinfo->ver >= 20 ?
s.y_start = devinfo->ver >= 20 ?
brw_vec1_grf(coef_payload.nr, 7) :
brw_vec1_grf(coef_payload.nr, 6);
const brw_reg z_cx = devinfo->ver >= 20 ?
s.z_cx = devinfo->ver >= 20 ?
brw_vec1_grf(coef_payload.nr + 1, 1) :
brw_vec1_grf(coef_payload.nr, 1);
const brw_reg z_cy = devinfo->ver >= 20 ?
s.z_cy = devinfo->ver >= 20 ?
brw_vec1_grf(coef_payload.nr + 1, 0) :
brw_vec1_grf(coef_payload.nr, 0);
const brw_reg z_c0 = devinfo->ver >= 20 ?
s.z_c0 = devinfo->ver >= 20 ?
brw_vec1_grf(coef_payload.nr + 1, 2) :
brw_vec1_grf(coef_payload.nr, 3);
const brw_reg float_pixel_x = abld.vgrf(BRW_TYPE_F);
const brw_reg float_pixel_y = abld.vgrf(BRW_TYPE_F);
abld.MOV(float_pixel_x, s.uw_pixel_x);
abld.MOV(float_pixel_y, s.uw_pixel_y);
abld.ADD(float_pixel_x, float_pixel_x, negate(x_start));
abld.ADD(float_pixel_y, float_pixel_y, negate(y_start));
const brw_reg f_cps_width = abld.vgrf(BRW_TYPE_F);
const brw_reg f_cps_height = abld.vgrf(BRW_TYPE_F);
abld.MOV(f_cps_width, ub_cps_width);
abld.MOV(f_cps_height, ub_cps_height);
/* Center in the middle of the coarse pixel. */
abld.MAD(float_pixel_x, float_pixel_x, f_cps_width, brw_imm_f(0.5f));
abld.MAD(float_pixel_y, float_pixel_y, f_cps_height, brw_imm_f(0.5f));
coarse_z = abld.vgrf(BRW_TYPE_F);
abld.MAD(coarse_z, z_c0, z_cx, float_pixel_x);
abld.MAD(coarse_z, coarse_z, z_cy, float_pixel_y);
}
if (wm_prog_data->uses_src_depth)
s.pixel_z = brw_fetch_payload_reg(bld, payload.source_depth_reg);
if (wm_prog_data->uses_depth_w_coefficients ||
wm_prog_data->uses_src_depth) {
switch (wm_prog_data->coarse_pixel_dispatch) {
case INTEL_NEVER:
break;
case INTEL_SOMETIMES:
/* We cannot enable 3DSTATE_PS_EXTRA::PixelShaderUsesSourceDepth when
* coarse is enabled. Here we don't know if it's going to be, but
* setting brw_wm_prog_data::uses_src_depth dynamically would disturb
* the payload. So instead rely on the computed coarse_z which will
* produce a correct value even when coarse is disabled.
*/
/* Fallthrough */
case INTEL_ALWAYS:
assert(!wm_prog_data->uses_src_depth);
assert(wm_prog_data->uses_depth_w_coefficients);
s.pixel_z = coarse_z;
break;
}
}
if (wm_prog_data->uses_src_w) {
abld = bld.annotate("compute pos.w");
s.pixel_w = brw_fetch_payload_reg(abld, payload.source_w_reg);
@ -1534,22 +1479,10 @@ brw_compile_fs(const struct brw_compiler *compiler,
if (!brw_can_coherent_fb_fetch(devinfo))
NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);
/* Do this lowering before brw_nir_populate_wm_prog_data(). */
NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord);
NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord);
/* From the SKL PRM, Volume 7, "Alpha Coverage":
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
* hardware, regardless of the state setting for this feature."
*/
if (key->alpha_to_coverage != INTEL_NEVER) {
/* Run constant fold optimization in order to get the correct source
* offset to determine render target 0 store instruction in
* emit_alpha_to_coverage pass.
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage);
}
NIR_PASS(_, nir, brw_nir_move_interpolation_to_top);
brw_nir_cleanup_pre_wm_prog_data(nir);
@ -1561,6 +1494,22 @@ brw_compile_fs(const struct brw_compiler *compiler,
params->mue_map,
per_primitive_offsets);
/* From the SKL PRM, Volume 7, "Alpha Coverage":
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
* hardware, regardless of the state setting for this feature."
*/
if (prog_data->alpha_to_coverage != INTEL_NEVER) {
/* Run constant fold optimization in order to get the correct source
* offset to determine render target 0 store instruction in
* emit_alpha_to_coverage pass.
*/
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage);
}
if (prog_data->coarse_pixel_dispatch != INTEL_NEVER)
NIR_PASS(_, nir, brw_nir_lower_frag_coord_z);
if (!brw_wm_prog_key_is_dynamic(key)) {
uint32_t f = 0;

View file

@ -3812,6 +3812,22 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
bld.MOV(dest, s.wpos_w);
break;
case nir_intrinsic_load_fs_start_intel: {
brw_reg comps[2] = { s.x_start, s.y_start };
bld.VEC(retype(dest, BRW_TYPE_F), comps, 2);
break;
}
case nir_intrinsic_load_fs_z_c_intel: {
brw_reg comps[2] = { s.z_cx, s.z_cy };
bld.VEC(retype(dest, BRW_TYPE_F), comps, 2);
break;
}
case nir_intrinsic_load_fs_z_c0_intel:
bld.MOV(dest, s.z_c0);
break;
case nir_intrinsic_load_front_face:
bld.MOV(retype(dest, BRW_TYPE_D), emit_frontfacing_interpolation(ntb));
break;

View file

@ -1307,6 +1307,36 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
nir->info.disable_output_offset_src_constant_folding = true;
}
static bool
lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *)
{
if (intrin->intrinsic != nir_intrinsic_load_frag_coord_z)
return false;
b->cursor = nir_after_instr(&intrin->instr);
b->fp_math_ctrl = nir_fp_no_fast_math;
nir_def *start = nir_load_fs_start_intel(b);
nir_def *z_c = nir_load_fs_z_c_intel(b);
nir_def *z_c0 = nir_load_fs_z_c0_intel(b);
nir_def *coord = nir_fadd_imm(b, nir_i2f32(b, nir_load_pixel_coord(b)), 0.5f);
nir_def *offset = nir_fsub(b, coord, start);
nir_def *dot = nir_fdot(b, offset, z_c);
nir_def *coarse_z = nir_fadd(b, dot, z_c0);
nir_def_replace(&intrin->def, coarse_z);
return true;
}
bool
brw_nir_lower_frag_coord_z(nir_shader *nir)
{
return nir_shader_intrinsics_pass(nir, lower_frag_coord_z_instr,
nir_metadata_control_flow, NULL);
}
static bool
tag_speculative_access(nir_builder *b,
nir_intrinsic_instr *intrin,

View file

@ -260,6 +260,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
bool brw_nir_lower_fs_load_output(nir_shader *shader,
const struct brw_wm_prog_key *key);
bool brw_nir_lower_frag_coord_z(nir_shader *nir);
bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);
struct brw_nir_lower_storage_image_opts {

View file

@ -211,6 +211,12 @@ public:
brw_reg pixel_z;
brw_reg wpos_w;
brw_reg pixel_w;
brw_reg x_start;
brw_reg y_start;
brw_reg z_cx;
brw_reg z_cy;
brw_reg z_c0;
brw_reg delta_xy[INTEL_BARYCENTRIC_MODE_COUNT];
brw_reg final_gs_vertex_count;
brw_reg control_data_bits;