diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 2e713fd43bb..a8b2bc96549 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2484,14 +2484,12 @@ image("store_raw_intel", src_comp=[1, 0]) # Maximum number of polygons processed in the fragment shader system_value("max_polygon_intel", 1, bit_sizes=[32]) -# Maximum number of polygons processed in the fragment shader -intel_fs_values = [ - (2, "start"), # X/Y coordinate (screen space) for upper-left vertex of a triangle being rasterized - (2, "z_c"), # z_c – Cx/Cy for z plane - (1, "z_c0"), # z_c0 – Co for z plane -] -for v in intel_fs_values: - system_value("fs_{0}_intel".format(v[1]), v[0], bit_sizes=[32]) +# Screen-space X/Y coordinate of upper-left vertex of the triangle being rasterized +system_value("fs_start_intel", 2, bit_sizes=[32]) + +# z_c, z_c0 (Cx/Cy/Co for Z plane) +system_value("fs_z_c_intel", 2, bit_sizes=[32]) +system_value("fs_z_c0_intel", 1, bit_sizes=[32]) # Read the attribute thread payload at a given byte offset # src[] = { offset } diff --git a/src/gallium/drivers/zink/ci/traces-zink.yml b/src/gallium/drivers/zink/ci/traces-zink.yml index d75e7cec77e..4e55f22e50c 100644 --- a/src/gallium/drivers/zink/ci/traces-zink.yml +++ b/src/gallium/drivers/zink/ci/traces-zink.yml @@ -7,9 +7,9 @@ traces: 0ad/0ad-v2.trace: gl-zink-anv-adl: label: [no-perf] - checksum: 0d04e54fa259d407433a7925c739900a + checksum: ff0d4b072dd613b6f11c351027db3bb3 gl-zink-anv-tgl: - checksum: 0d04e54fa259d407433a7925c739900a + checksum: ff0d4b072dd613b6f11c351027db3bb3 zink-radv-vangogh: checksum: 52cabbe16a14628f92df31e0fb4c109e zink-radv-gfx1201: diff --git a/src/intel/compiler/brw/brw_compile_fs.cpp b/src/intel/compiler/brw/brw_compile_fs.cpp index 5ef18a59b8a..f3fdeb25752 100644 --- a/src/intel/compiler/brw/brw_compile_fs.cpp +++ b/src/intel/compiler/brw/brw_compile_fs.cpp @@ -451,83 +451,28 @@ brw_emit_interpolation_setup(brw_shader &s) } } - abld = bld.annotate("compute pos.z"); - brw_reg coarse_z; - if (wm_prog_data->coarse_pixel_dispatch != INTEL_NEVER && - wm_prog_data->uses_depth_w_coefficients) { - /* In coarse pixel mode, the HW doesn't interpolate Z coordinate - * properly. In the same way we have to add the coarse pixel size to - * pixels locations, here we recompute the Z value with 2 coefficients - * in X & Y axis. - * - * src_z = (x - xstart)*z_cx + (y - ystart)*z_cy + z_c0 - */ + if (wm_prog_data->uses_depth_w_coefficients) { brw_reg coef_payload = brw_vec8_grf(payload.depth_w_coef_reg, 0); - const brw_reg x_start = devinfo->ver >= 20 ? + s.x_start = devinfo->ver >= 20 ? brw_vec1_grf(coef_payload.nr, 6) : brw_vec1_grf(coef_payload.nr, 2); - const brw_reg y_start = devinfo->ver >= 20 ? + s.y_start = devinfo->ver >= 20 ? brw_vec1_grf(coef_payload.nr, 7) : brw_vec1_grf(coef_payload.nr, 6); - const brw_reg z_cx = devinfo->ver >= 20 ? + s.z_cx = devinfo->ver >= 20 ? brw_vec1_grf(coef_payload.nr + 1, 1) : brw_vec1_grf(coef_payload.nr, 1); - const brw_reg z_cy = devinfo->ver >= 20 ? + s.z_cy = devinfo->ver >= 20 ? brw_vec1_grf(coef_payload.nr + 1, 0) : brw_vec1_grf(coef_payload.nr, 0); - const brw_reg z_c0 = devinfo->ver >= 20 ? + s.z_c0 = devinfo->ver >= 20 ? brw_vec1_grf(coef_payload.nr + 1, 2) : brw_vec1_grf(coef_payload.nr, 3); - - const brw_reg float_pixel_x = abld.vgrf(BRW_TYPE_F); - const brw_reg float_pixel_y = abld.vgrf(BRW_TYPE_F); - - abld.MOV(float_pixel_x, s.uw_pixel_x); - abld.MOV(float_pixel_y, s.uw_pixel_y); - - abld.ADD(float_pixel_x, float_pixel_x, negate(x_start)); - abld.ADD(float_pixel_y, float_pixel_y, negate(y_start)); - - const brw_reg f_cps_width = abld.vgrf(BRW_TYPE_F); - const brw_reg f_cps_height = abld.vgrf(BRW_TYPE_F); - abld.MOV(f_cps_width, ub_cps_width); - abld.MOV(f_cps_height, ub_cps_height); - - /* Center in the middle of the coarse pixel. */ - abld.MAD(float_pixel_x, float_pixel_x, f_cps_width, brw_imm_f(0.5f)); - abld.MAD(float_pixel_y, float_pixel_y, f_cps_height, brw_imm_f(0.5f)); - - coarse_z = abld.vgrf(BRW_TYPE_F); - abld.MAD(coarse_z, z_c0, z_cx, float_pixel_x); - abld.MAD(coarse_z, coarse_z, z_cy, float_pixel_y); } if (wm_prog_data->uses_src_depth) s.pixel_z = brw_fetch_payload_reg(bld, payload.source_depth_reg); - if (wm_prog_data->uses_depth_w_coefficients || - wm_prog_data->uses_src_depth) { - switch (wm_prog_data->coarse_pixel_dispatch) { - case INTEL_NEVER: - break; - - case INTEL_SOMETIMES: - /* We cannot enable 3DSTATE_PS_EXTRA::PixelShaderUsesSourceDepth when - * coarse is enabled. Here we don't know if it's going to be, but - * setting brw_wm_prog_data::uses_src_depth dynamically would disturb - * the payload. So instead rely on the computed coarse_z which will - * produce a correct value even when coarse is disabled. - */ - - /* Fallthrough */ - case INTEL_ALWAYS: - assert(!wm_prog_data->uses_src_depth); - assert(wm_prog_data->uses_depth_w_coefficients); - s.pixel_z = coarse_z; - break; - } - } - if (wm_prog_data->uses_src_w) { abld = bld.annotate("compute pos.w"); s.pixel_w = brw_fetch_payload_reg(abld, payload.source_w_reg); @@ -1534,22 +1479,10 @@ brw_compile_fs(const struct brw_compiler *compiler, if (!brw_can_coherent_fb_fetch(devinfo)) NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key); + /* Do this lowering before brw_nir_populate_wm_prog_data(). */ NIR_PASS(_, nir, nir_opt_frag_coord_to_pixel_coord); NIR_PASS(_, nir, nir_lower_frag_coord_to_pixel_coord); - /* From the SKL PRM, Volume 7, "Alpha Coverage": - * "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in - * hardware, regardless of the state setting for this feature." - */ - if (key->alpha_to_coverage != INTEL_NEVER) { - /* Run constant fold optimization in order to get the correct source - * offset to determine render target 0 store instruction in - * emit_alpha_to_coverage pass. - */ - NIR_PASS(_, nir, nir_opt_constant_folding); - NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage); - } - NIR_PASS(_, nir, brw_nir_move_interpolation_to_top); brw_nir_cleanup_pre_wm_prog_data(nir); @@ -1561,6 +1494,22 @@ brw_compile_fs(const struct brw_compiler *compiler, params->mue_map, per_primitive_offsets); + /* From the SKL PRM, Volume 7, "Alpha Coverage": + * "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in + * hardware, regardless of the state setting for this feature." + */ + if (prog_data->alpha_to_coverage != INTEL_NEVER) { + /* Run constant fold optimization in order to get the correct source + * offset to determine render target 0 store instruction in + * emit_alpha_to_coverage pass. + */ + NIR_PASS(_, nir, nir_opt_constant_folding); + NIR_PASS(_, nir, brw_nir_lower_alpha_to_coverage); + } + + if (prog_data->coarse_pixel_dispatch != INTEL_NEVER) + NIR_PASS(_, nir, brw_nir_lower_frag_coord_z); + if (!brw_wm_prog_key_is_dynamic(key)) { uint32_t f = 0; diff --git a/src/intel/compiler/brw/brw_from_nir.cpp b/src/intel/compiler/brw/brw_from_nir.cpp index d7ba9b1dcad..b4dc8468ccb 100644 --- a/src/intel/compiler/brw/brw_from_nir.cpp +++ b/src/intel/compiler/brw/brw_from_nir.cpp @@ -3812,6 +3812,22 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, bld.MOV(dest, s.wpos_w); break; + case nir_intrinsic_load_fs_start_intel: { + brw_reg comps[2] = { s.x_start, s.y_start }; + bld.VEC(retype(dest, BRW_TYPE_F), comps, 2); + break; + } + + case nir_intrinsic_load_fs_z_c_intel: { + brw_reg comps[2] = { s.z_cx, s.z_cy }; + bld.VEC(retype(dest, BRW_TYPE_F), comps, 2); + break; + } + + case nir_intrinsic_load_fs_z_c0_intel: + bld.MOV(dest, s.z_c0); + break; + case nir_intrinsic_load_front_face: bld.MOV(retype(dest, BRW_TYPE_D), emit_frontfacing_interpolation(ntb)); break; diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index a3411f00fbe..a48f7b9f621 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -1307,6 +1307,36 @@ brw_nir_lower_fs_outputs(nir_shader *nir) nir->info.disable_output_offset_src_constant_folding = true; } +static bool +lower_frag_coord_z_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *) +{ + if (intrin->intrinsic != nir_intrinsic_load_frag_coord_z) + return false; + + b->cursor = nir_after_instr(&intrin->instr); + b->fp_math_ctrl = nir_fp_no_fast_math; + + nir_def *start = nir_load_fs_start_intel(b); + nir_def *z_c = nir_load_fs_z_c_intel(b); + nir_def *z_c0 = nir_load_fs_z_c0_intel(b); + nir_def *coord = nir_fadd_imm(b, nir_i2f32(b, nir_load_pixel_coord(b)), 0.5f); + + nir_def *offset = nir_fsub(b, coord, start); + nir_def *dot = nir_fdot(b, offset, z_c); + nir_def *coarse_z = nir_fadd(b, dot, z_c0); + + nir_def_replace(&intrin->def, coarse_z); + + return true; +} + +bool +brw_nir_lower_frag_coord_z(nir_shader *nir) +{ + return nir_shader_intrinsics_pass(nir, lower_frag_coord_z_instr, + nir_metadata_control_flow, NULL); +} + static bool tag_speculative_access(nir_builder *b, nir_intrinsic_instr *intrin, diff --git a/src/intel/compiler/brw/brw_nir.h b/src/intel/compiler/brw/brw_nir.h index d4c2a04e2e9..76cd7355357 100644 --- a/src/intel/compiler/brw/brw_nir.h +++ b/src/intel/compiler/brw/brw_nir.h @@ -260,6 +260,8 @@ void brw_nir_lower_fs_outputs(nir_shader *nir); bool brw_nir_lower_fs_load_output(nir_shader *shader, const struct brw_wm_prog_key *key); +bool brw_nir_lower_frag_coord_z(nir_shader *nir); + bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size); struct brw_nir_lower_storage_image_opts { diff --git a/src/intel/compiler/brw/brw_shader.h b/src/intel/compiler/brw/brw_shader.h index 50835d1f22b..10b558dc5ba 100644 --- a/src/intel/compiler/brw/brw_shader.h +++ b/src/intel/compiler/brw/brw_shader.h @@ -211,6 +211,12 @@ public: brw_reg pixel_z; brw_reg wpos_w; brw_reg pixel_w; + brw_reg x_start; + brw_reg y_start; + brw_reg z_cx; + brw_reg z_cy; + brw_reg z_c0; + brw_reg delta_xy[INTEL_BARYCENTRIC_MODE_COUNT]; brw_reg final_gs_vertex_count; brw_reg control_data_bits;