nir/opt_varyings: Support implementations that cannot compact 16-bits

Add nir_io_compact_to_higher_16 flag so that the pass knows if it can compact 16-bit varyings into the higher 16 bits of a 32-bit varying. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Aitor Camacho <aitor@lunarg.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38994>
2025-12-17 21:32:36 +09:00 · 2025-12-17 21:32:36 +09:00 · fcf53988c4
commit fcf53988c4
parent fdfe3acdf0
3 changed files with 18 additions and 5 deletions
--- a/src/amd/common/nir/ac_nir.c
+++ b/src/amd/common/nir/ac_nir.c
@ -114,7 +114,8 @@ void ac_nir_set_options(struct radeon_info *info, bool use_llvm,
                         nir_io_vectorizer_ignores_types |
                         nir_io_compaction_rotates_color_channels |
                         nir_io_assign_color_input_bases_after_all_other_inputs |
-                         nir_io_use_frag_result_dual_src_blend;
+                         nir_io_use_frag_result_dual_src_blend  |
+                         nir_io_compact_to_higher_16;
   options->lower_layer_fs_input_to_sysval = true;
   options->scalarize_ddx = true;
   options->coarse_ddx = true;
--- a/src/compiler/nir/nir_opt_varyings.c
+++ b/src/compiler/nir/nir_opt_varyings.c
@ -659,6 +659,7 @@ struct linkage_info {
   bool has_flexible_interp;
   bool always_interpolate_convergent_fs_inputs;
   bool group_tes_inputs_into_pos_var_groups;
+   bool can_compact_to_higher_16;

   mesa_shader_stage producer_stage;
   mesa_shader_stage consumer_stage;
@ -4788,8 +4789,9 @@ vs_tcs_tes_gs_assign_slots_2sets(struct linkage_info *linkage,
    */
   vs_tcs_tes_gs_assign_slots(linkage, input32_mask, slot_index,
                              patch_slot_index, 2, progress);
+   unsigned slot_size_16bit = linkage->can_compact_to_higher_16 ? 1 : 2;
   vs_tcs_tes_gs_assign_slots(linkage, input16_mask, slot_index,
-                              patch_slot_index, 1, progress);
+                              patch_slot_index, slot_size_16bit, progress);

   assert(*slot_index <= VARYING_SLOT_MAX * 8);
   assert(!patch_slot_index || *patch_slot_index <= VARYING_SLOT_TESS_MAX * 8);
@ -4810,6 +4812,7 @@ static void
 compact_varyings(struct linkage_info *linkage,
                 nir_opt_varyings_progress *progress)
 {
+   unsigned slot_size_16bit = linkage->can_compact_to_higher_16 ? 1 : 2;
   if (linkage->consumer_stage == MESA_SHADER_FRAGMENT) {
      /* These arrays are used to track which scalar slots we've already
       * assigned. We can fill unused components of indirectly-indexed slots,
@ -4866,7 +4869,7 @@ compact_varyings(struct linkage_info *linkage,
         fs_assign_slot_groups(linkage, assigned_mask, assigned_fs_vec4_type,
                               linkage->interp_fp16_mask, linkage->flat16_mask,
                               linkage->convergent16_mask, NULL,
-                               FS_VEC4_TYPE_INTERP_FP16, 1, false, 0, progress);
+                               FS_VEC4_TYPE_INTERP_FP16, slot_size_16bit, false, 0, progress);
      } else {
         /* Basically the same as above. */
         fs_assign_slot_groups_separate_qual(
@ -4879,7 +4882,7 @@ compact_varyings(struct linkage_info *linkage,
            linkage, assigned_mask, assigned_fs_vec4_type,
            &linkage->interp_fp16_qual_masks, linkage->flat16_mask,
            linkage->convergent16_mask, NULL,
-            FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, 1, false, 0, progress);
+            FS_VEC4_TYPE_INTERP_FP16_PERSP_PIXEL, slot_size_16bit, false, 0, progress);
      }

      /* Assign INTERP_MODE_EXPLICIT. Both FP32 and FP16 can occupy the same
@ -5247,7 +5250,10 @@ init_linkage(nir_shader *producer, nir_shader *consumer, bool spirv,
      .group_tes_inputs_into_pos_var_groups =
         consumer->info.stage == MESA_SHADER_TESS_EVAL &&
         consumer->options->io_options &
-         nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups,
+            nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups,
+      .can_compact_to_higher_16 = producer->options->io_options &
+                                  consumer->options->io_options &
+                                  nir_io_compact_to_higher_16,
      .producer_stage = producer->info.stage,
      .consumer_stage = consumer->info.stage,
      .producer_builder =
--- a/src/compiler/nir/nir_shader_compiler_options.h
+++ b/src/compiler/nir/nir_shader_compiler_options.h
@ -216,6 +216,12 @@ typedef enum {
    */
   nir_io_use_frag_result_dual_src_blend = BITFIELD_BIT(12),

+   /**
+    * Whether the implementation can compact 16-bit values in the higher
+    * 32-bits of a varying.
+    */
+   nir_io_compact_to_higher_16 = BITFIELD_BIT(13),
+
   /* Options affecting the GLSL compiler or Gallium are below. */

   /**