diff --git a/src/amd/common/nir/ac_nir.c b/src/amd/common/nir/ac_nir.c index 42959dc591e..917a459e2bd 100644 --- a/src/amd/common/nir/ac_nir.c +++ b/src/amd/common/nir/ac_nir.c @@ -114,7 +114,8 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm, nir_io_vectorizer_ignores_types | nir_io_compaction_rotates_color_channels | nir_io_assign_color_input_bases_after_all_other_inputs | - nir_io_use_frag_result_dual_src_blend; + nir_io_use_frag_result_dual_src_blend | + nir_io_compact_to_higher_16; options->lower_layer_fs_input_to_sysval = true; options->scalarize_ddx = true; options->coarse_ddx = true; diff --git a/src/compiler/nir/nir_opt_varyings.c b/src/compiler/nir/nir_opt_varyings.c index 47e495d2958..af98a1d47af 100644 --- a/src/compiler/nir/nir_opt_varyings.c +++ b/src/compiler/nir/nir_opt_varyings.c @@ -659,6 +659,7 @@ struct linkage_info { bool has_flexible_interp; bool always_interpolate_convergent_fs_inputs; bool group_tes_inputs_into_pos_var_groups; + bool can_compact_to_higher_16; mesa_shader_stage producer_stage; mesa_shader_stage consumer_stage; @@ -4788,8 +4789,9 @@ vs_tcs_tes_gs_assign_slots_2sets(struct linkage_info *linkage, */ vs_tcs_tes_gs_assign_slots(linkage, input32_mask, slot_index, patch_slot_index, 2, progress); + unsigned slot_size_16bit = linkage->can_compact_to_higher_16 ? 1 : 2; vs_tcs_tes_gs_assign_slots(linkage, input16_mask, slot_index, - patch_slot_index, 1, progress); + patch_slot_index, slot_size_16bit, progress); assert(*slot_index <= VARYING_SLOT_MAX * 8); assert(!patch_slot_index || *patch_slot_index <= VARYING_SLOT_TESS_MAX * 8); @@ -4810,6 +4812,7 @@ static void compact_varyings(struct linkage_info *linkage, nir_opt_varyings_progress *progress) { + unsigned slot_size_16bit = linkage->can_compact_to_higher_16 ? 1 : 2; if (linkage->consumer_stage == MESA_SHADER_FRAGMENT) { /* These arrays are used to track which scalar slots we've already * assigned. We can fill unused components of indirectly-indexed slots, @@ -4866,7 +4869,7 @@ compact_varyings(struct linkage_info *linkage, fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type, linkage->interp_fp16_mask, linkage->flat16_mask, linkage->convergent16_mask, NULL, - FS_VEC4_TYPE_INTERP_FP16, 1, false, 0, progress); + FS_VEC4_TYPE_INTERP_FP16, slot_size_16bit, false, 0, progress); } else { /* Basically the same as above. */ fs_assign_slot_groups_separate_qual( @@ -4879,7 +4882,7 @@ compact_varyings(struct linkage_info *linkage, linkage, assigned_mask, assigned_fs_vec4_type, &linkage->interp_fp16_qual_masks, linkage->flat16_mask, linkage->convergent16_mask, NULL, - FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, 1, false, 0, progress); + FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, slot_size_16bit, false, 0, progress); } /* Assign INTERP_MODE_EXPLICIT. Both FP32 and FP16 can occupy the same @@ -5247,7 +5250,10 @@ init_linkage(nir_shader *producer, nir_shader *consumer, bool spirv, .group_tes_inputs_into_pos_var_groups = consumer->info.stage == MESA_SHADER_TESS_EVAL && consumer->options->io_options & - nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups, + nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups, + .can_compact_to_higher_16 = producer->options->io_options & + consumer->options->io_options & + nir_io_compact_to_higher_16, .producer_stage = producer->info.stage, .consumer_stage = consumer->info.stage, .producer_builder = diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h index de20eb25a5b..790148485e3 100644 --- a/src/compiler/nir/nir_shader_compiler_options.h +++ b/src/compiler/nir/nir_shader_compiler_options.h @@ -216,6 +216,12 @@ typedef enum { */ nir_io_use_frag_result_dual_src_blend = BITFIELD_BIT(12), + /** + * Whether the implementation can compact 16-bit values in the higher + * 32-bits of a varying. + */ + nir_io_compact_to_higher_16 = BITFIELD_BIT(13), + /* Options affecting the GLSL compiler or Gallium are below. */ /**