brw: move coarse_z computation to NIR
So that we can print it easily with debug printfs Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38996>
This commit is contained in:
parent
98194dfa0b
commit
a19e949824
7 changed files with 85 additions and 84 deletions
|
|
@ -2484,14 +2484,12 @@ image("store_raw_intel", src_comp=[1, 0])
|
|||
# Maximum number of polygons processed in the fragment shader
|
||||
system_value("max_polygon_intel", 1, bit_sizes=[32])
|
||||
|
||||
# Maximum number of polygons processed in the fragment shader
|
||||
intel_fs_values = [
|
||||
(2, "start"), # X/Y coordinate (screen space) for upper-left vertex of a triangle being rasterized
|
||||
(2, "z_c"), # z_c – Cx/Cy for z plane
|
||||
(1, "z_c0"), # z_c0 – Co for z plane
|
||||
]
|
||||
for v in intel_fs_values:
|
||||
system_value("fs_{0}_intel".format(v[1]), v[0], bit_sizes=[32])
|
||||
# Screen-space X/Y coordinate of upper-left vertex of the triangle being rasterized
|
||||
system_value("fs_start_intel", 2, bit_sizes=[32])
|
||||
|
||||
# z_c, z_c0 (Cx/Cy/Co for Z plane)
|
||||
system_value("fs_z_c_intel", 2, bit_sizes=[32])
|
||||
system_value("fs_z_c0_intel", 1, bit_sizes=[32])
|
||||
|
||||
# Read the attribute thread payload at a given byte offset
|
||||
# src[] = { offset }
|
||||
|
|
|
|||
|
|
@ -7,9 +7,9 @@ traces:
|
|||
0ad/0ad-v2.trace:
|
||||
gl-zink-anv-adl:
|
||||
label: [no-perf]
|
||||
checksum: 0d04e54fa259d407433a7925c739900a
|
||||
checksum: ff0d4b072dd613b6f11c351027db3bb3
|
||||
gl-zink-anv-tgl:
|
||||
checksum: 0d04e54fa259d407433a7925c739900a
|
||||
checksum: ff0d4b072dd613b6f11c351027db3bb3
|
||||
zink-radv-vangogh:
|
||||
checksum: 52cabbe16a14628f92df31e0fb4c109e
|
||||
zink-radv-gfx1201:
|
||||
|
|
|
|||
|
|
@ -451,83 +451,28 @@ brw_emit_interpolation_setup(brw_shader &s)
|
|||
}
|
||||
}
|
||||
|
||||
abld = bld.annotate("compute pos.z");
|
||||
brw_reg coarse_z;
|
||||
if (wm_prog_data->coarse_pixel_dispatch != INTEL_NEVER &&
|
||||
wm_prog_data->uses_depth_w_coefficients) {
|
||||
/* In coarse pixel mode, the HW doesn't interpolate Z coordinate
|
||||
* properly. In the same way we have to add the coarse pixel size to
|
||||
* pixels locations, here we recompute the Z value with 2 coefficients
|
||||
* in X & Y axis.
|
||||
*
|
||||
* src_z = (x - xstart)*z_cx + (y - ystart)*z_cy + z_c0
|
||||
*/
|
||||
if (wm_prog_data->uses_depth_w_coefficients) {
|
||||
brw_reg coef_payload = brw_vec8_grf(payload.depth_w_coef_reg, 0);
|
||||
const brw_reg x_start = devinfo->ver >= 20 ?
|
||||
s.x_start = devinfo->ver >= 20 ?
|
||||
brw_vec1_grf(coef_payload.nr, 6) :
|
||||
brw_vec1_grf(coef_payload.nr, 2);
|
||||
const brw_reg y_start = devinfo->ver >= 20 ?
|
||||
s.y_start = devinfo->ver >= 20 ?
|
||||
brw_vec1_grf(coef_payload.nr, 7) :
|
||||
brw_vec1_grf(coef_payload.nr, 6);
|
||||
const brw_reg z_cx = devinfo->ver >= 20 ?
|
||||
s.z_cx = devinfo->ver >= 20 ?
|
||||
brw_vec1_grf(coef_payload.nr + 1, 1) :
|
||||
brw_vec1_grf(coef_payload.nr, 1);
|
||||
const brw_reg z_cy = devinfo->ver >= 20 ?
|
||||
s.z_cy = devinfo->ver >= 20 ?
|
||||
brw_vec1_grf(coef_payload.nr + 1, 0) :
|
||||
brw_vec1_grf(coef_payload.nr, 0);
|
||||
const brw_reg z_c0 = devinfo->ver >= 20 ?
|
||||
s.z_c0 = devinfo->ver >= 20 ?
|
||||
brw_vec1_grf(coef_payload.nr + 1, 2) :
|
||||
brw_vec1_grf(coef_payload.nr, 3);
|
||||
|
||||
const brw_reg float_pixel_x = abld.vgrf(BRW_TYPE_F);
|
||||
const brw_reg float_pixel_y = abld.vgrf(BRW_TYPE_F);
|
||||
|
||||
abld.MOV(float_pixel_x, s.uw_pixel_x);
|
||||
abld.MOV(float_pixel_y, s.uw_pixel_y);
|
||||
|
||||
abld.ADD(float_pixel_x, float_pixel_x, negate(x_start));
|
||||
abld.ADD(float_pixel_y, float_pixel_y, negate(y_start));
|
||||
|
||||
const brw_reg f_cps_width = abld.vgrf(BRW_TYPE_F);
|
||||
const brw_reg f_cps_height = abld.vgrf(BRW_TYPE_F);
|
||||
abld.MOV(f_cps_width, ub_cps_width);
|
||||
abld.MOV(f_cps_height, ub_cps_height);
|
||||
|
||||
/* Center in the middle of the coarse pixel. */
|
||||
abld.MAD(float_pixel_x, float_pixel_x, f_cps_width, brw_imm_f(0.5f));
|
||||
abld.MAD(float_pixel_y, float_pixel_y, f_cps_height, brw_imm_f(0.5f));
|
||||
|
||||
coarse_z = abld.vgrf(BRW_TYPE_F);
|
||||
abld.MAD(coarse_z, z_c0, z_cx, float_pixel_x);
|
||||
abld.MAD(coarse_z, coarse_z, z_cy, float_pixel_y);
|
||||
}
|
||||
|
||||
if (wm_prog_data->uses_src_depth)
|
||||
s.pixel_z = brw_fetch_payload_reg(bld, payload.source_depth_reg);
|
||||
|
||||
if (wm_prog_data->uses_depth_w_coefficients ||
|
||||
wm_prog_data->uses_src_depth) {
|
||||
switch (wm_prog_data->coarse_pixel_dispatch) {
|
||||
case INTEL_NEVER:
|
||||
break;
|
||||
|
||||
case INTEL_SOMETIMES:
|
||||
/* We cannot enable 3DSTATE_PS_EXTRA::PixelShaderUsesSourceDepth when
|
||||
* coarse is enabled. Here we don't know if it's going to be, but
|
||||
* setting brw_wm_prog_data::uses_src_depth dynamically would disturb
|
||||
* the payload. So instead rely on the computed coarse_z which will
|
||||
* produce a correct value even when coarse is disabled.
|
||||
*/
|
||||
|
||||
/* Fallthrough */
|
||||
case INTEL_ALWAYS:
|
||||
assert(!wm_prog_data->uses_src_depth);
|
||||
assert(wm_prog_data->uses_depth_w_coefficients);
|
||||
s.pixel_z = coarse_z;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (wm_prog_data->uses_src_w) {
|
||||
abld = bld.annotate("compute pos.w");
|
||||
s.pixel_w = brw_fetch_payload_reg(abld, payload.source_w_reg);
|
||||
|
|
@ -1534,22 +1479,10 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
if (!brw_can_coherent_fb_fetch(devinfo))
|
||||
NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);
|
||||
|
||||
/* Do this lowering before brw_nir_populate_wm_prog_data(). */
|
||||
NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord);
|
||||
NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord);
|
||||
|
||||
/* From the SKL PRM, Volume 7, "Alpha Coverage":
|
||||
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
|
||||
* hardware, regardless of the state setting for this feature."
|
||||
*/
|
||||
if (key->alpha_to_coverage != INTEL_NEVER) {
|
||||
/* Run constant fold optimization in order to get the correct source
|
||||
* offset to determine render target 0 store instruction in
|
||||
* emit_alpha_to_coverage pass.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage);
|
||||
}
|
||||
|
||||
NIR_PASS(_, nir, brw_nir_move_interpolation_to_top);
|
||||
|
||||
brw_nir_cleanup_pre_wm_prog_data(nir);
|
||||
|
|
@ -1561,6 +1494,22 @@ brw_compile_fs(const struct brw_compiler *compiler,
|
|||
params->mue_map,
|
||||
per_primitive_offsets);
|
||||
|
||||
/* From the SKL PRM, Volume 7, "Alpha Coverage":
|
||||
* "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
|
||||
* hardware, regardless of the state setting for this feature."
|
||||
*/
|
||||
if (prog_data->alpha_to_coverage != INTEL_NEVER) {
|
||||
/* Run constant fold optimization in order to get the correct source
|
||||
* offset to determine render target 0 store instruction in
|
||||
* emit_alpha_to_coverage pass.
|
||||
*/
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage);
|
||||
}
|
||||
|
||||
if (prog_data->coarse_pixel_dispatch != INTEL_NEVER)
|
||||
NIR_PASS(_, nir, brw_nir_lower_frag_coord_z);
|
||||
|
||||
if (!brw_wm_prog_key_is_dynamic(key)) {
|
||||
uint32_t f = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -3812,6 +3812,22 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
|
|||
bld.MOV(dest, s.wpos_w);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_fs_start_intel: {
|
||||
brw_reg comps[2] = { s.x_start, s.y_start };
|
||||
bld.VEC(retype(dest, BRW_TYPE_F), comps, 2);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_fs_z_c_intel: {
|
||||
brw_reg comps[2] = { s.z_cx, s.z_cy };
|
||||
bld.VEC(retype(dest, BRW_TYPE_F), comps, 2);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_fs_z_c0_intel:
|
||||
bld.MOV(dest, s.z_c0);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_front_face:
|
||||
bld.MOV(retype(dest, BRW_TYPE_D), emit_frontfacing_interpolation(ntb));
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1307,6 +1307,36 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
|
|||
nir->info.disable_output_offset_src_constant_folding = true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_frag_coord_z)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_after_instr(&intrin->instr);
|
||||
b->fp_math_ctrl = nir_fp_no_fast_math;
|
||||
|
||||
nir_def *start = nir_load_fs_start_intel(b);
|
||||
nir_def *z_c = nir_load_fs_z_c_intel(b);
|
||||
nir_def *z_c0 = nir_load_fs_z_c0_intel(b);
|
||||
nir_def *coord = nir_fadd_imm(b, nir_i2f32(b, nir_load_pixel_coord(b)), 0.5f);
|
||||
|
||||
nir_def *offset = nir_fsub(b, coord, start);
|
||||
nir_def *dot = nir_fdot(b, offset, z_c);
|
||||
nir_def *coarse_z = nir_fadd(b, dot, z_c0);
|
||||
|
||||
nir_def_replace(&intrin->def, coarse_z);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_frag_coord_z(nir_shader *nir)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(nir, lower_frag_coord_z_instr,
|
||||
nir_metadata_control_flow, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
tag_speculative_access(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
|
|
|
|||
|
|
@ -260,6 +260,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir);
|
|||
bool brw_nir_lower_fs_load_output(nir_shader *shader,
|
||||
const struct brw_wm_prog_key *key);
|
||||
|
||||
bool brw_nir_lower_frag_coord_z(nir_shader *nir);
|
||||
|
||||
bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);
|
||||
|
||||
struct brw_nir_lower_storage_image_opts {
|
||||
|
|
|
|||
|
|
@ -211,6 +211,12 @@ public:
|
|||
brw_reg pixel_z;
|
||||
brw_reg wpos_w;
|
||||
brw_reg pixel_w;
|
||||
brw_reg x_start;
|
||||
brw_reg y_start;
|
||||
brw_reg z_cx;
|
||||
brw_reg z_cy;
|
||||
brw_reg z_c0;
|
||||
|
||||
brw_reg delta_xy[INTEL_BARYCENTRIC_MODE_COUNT];
|
||||
brw_reg final_gs_vertex_count;
|
||||
brw_reg control_data_bits;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue