nir/opt_varyings: Support implementations that cannot compact 16-bits
Add nir_io_compact_to_higher_16 flag so that the pass knows if it can compact 16-bit varyings into the higher 16 bits of a 32-bit varying. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Aitor Camacho <aitor@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38994>
This commit is contained in:
parent
fdfe3acdf0
commit
fcf53988c4
3 changed files with 18 additions and 5 deletions
|
|
@ -114,7 +114,8 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm,
|
|||
nir_io_vectorizer_ignores_types |
|
||||
nir_io_compaction_rotates_color_channels |
|
||||
nir_io_assign_color_input_bases_after_all_other_inputs |
|
||||
nir_io_use_frag_result_dual_src_blend;
|
||||
nir_io_use_frag_result_dual_src_blend |
|
||||
nir_io_compact_to_higher_16;
|
||||
options->lower_layer_fs_input_to_sysval = true;
|
||||
options->scalarize_ddx = true;
|
||||
options->coarse_ddx = true;
|
||||
|
|
|
|||
|
|
@ -659,6 +659,7 @@ struct linkage_info {
|
|||
bool has_flexible_interp;
|
||||
bool always_interpolate_convergent_fs_inputs;
|
||||
bool group_tes_inputs_into_pos_var_groups;
|
||||
bool can_compact_to_higher_16;
|
||||
|
||||
mesa_shader_stage producer_stage;
|
||||
mesa_shader_stage consumer_stage;
|
||||
|
|
@ -4788,8 +4789,9 @@ vs_tcs_tes_gs_assign_slots_2sets(struct linkage_info *linkage,
|
|||
*/
|
||||
vs_tcs_tes_gs_assign_slots(linkage, input32_mask, slot_index,
|
||||
patch_slot_index, 2, progress);
|
||||
unsigned slot_size_16bit = linkage->can_compact_to_higher_16 ? 1 : 2;
|
||||
vs_tcs_tes_gs_assign_slots(linkage, input16_mask, slot_index,
|
||||
patch_slot_index, 1, progress);
|
||||
patch_slot_index, slot_size_16bit, progress);
|
||||
|
||||
assert(*slot_index <= VARYING_SLOT_MAX * 8);
|
||||
assert(!patch_slot_index || *patch_slot_index <= VARYING_SLOT_TESS_MAX * 8);
|
||||
|
|
@ -4810,6 +4812,7 @@ static void
|
|||
compact_varyings(struct linkage_info *linkage,
|
||||
nir_opt_varyings_progress *progress)
|
||||
{
|
||||
unsigned slot_size_16bit = linkage->can_compact_to_higher_16 ? 1 : 2;
|
||||
if (linkage->consumer_stage == MESA_SHADER_FRAGMENT) {
|
||||
/* These arrays are used to track which scalar slots we've already
|
||||
* assigned. We can fill unused components of indirectly-indexed slots,
|
||||
|
|
@ -4866,7 +4869,7 @@ compact_varyings(struct linkage_info *linkage,
|
|||
fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
linkage->interp_fp16_mask, linkage->flat16_mask,
|
||||
linkage->convergent16_mask, NULL,
|
||||
FS_VEC4_TYPE_INTERP_FP16, 1, false, 0, progress);
|
||||
FS_VEC4_TYPE_INTERP_FP16, slot_size_16bit, false, 0, progress);
|
||||
} else {
|
||||
/* Basically the same as above. */
|
||||
fs_assign_slot_groups_separate_qual(
|
||||
|
|
@ -4879,7 +4882,7 @@ compact_varyings(struct linkage_info *linkage,
|
|||
linkage, assigned_mask, assigned_fs_vec4_type,
|
||||
&linkage->interp_fp16_qual_masks, linkage->flat16_mask,
|
||||
linkage->convergent16_mask, NULL,
|
||||
FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, 1, false, 0, progress);
|
||||
FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, slot_size_16bit, false, 0, progress);
|
||||
}
|
||||
|
||||
/* Assign INTERP_MODE_EXPLICIT. Both FP32 and FP16 can occupy the same
|
||||
|
|
@ -5247,7 +5250,10 @@ init_linkage(nir_shader *producer, nir_shader *consumer, bool spirv,
|
|||
.group_tes_inputs_into_pos_var_groups =
|
||||
consumer->info.stage == MESA_SHADER_TESS_EVAL &&
|
||||
consumer->options->io_options &
|
||||
nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups,
|
||||
nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups,
|
||||
.can_compact_to_higher_16 = producer->options->io_options &
|
||||
consumer->options->io_options &
|
||||
nir_io_compact_to_higher_16,
|
||||
.producer_stage = producer->info.stage,
|
||||
.consumer_stage = consumer->info.stage,
|
||||
.producer_builder =
|
||||
|
|
|
|||
|
|
@ -216,6 +216,12 @@ typedef enum {
|
|||
*/
|
||||
nir_io_use_frag_result_dual_src_blend = BITFIELD_BIT(12),
|
||||
|
||||
/**
|
||||
* Whether the implementation can compact 16-bit values in the higher
|
||||
* 32-bits of a varying.
|
||||
*/
|
||||
nir_io_compact_to_higher_16 = BITFIELD_BIT(13),
|
||||
|
||||
/* Options affecting the GLSL compiler or Gallium are below. */
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue