nvk, nak: Implement shaderSharedInt64Atomics

Current nvidia devices miss support for 64-bit arithmetic atomics, we replace them with compare-and-swap loops using nir_lower_atomics. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10330 Signed-off-by: Lorenzo Rossi <snowycoder@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33572>
2025-02-17 00:39:32 +01:00 · 2025-02-17 00:39:32 +01:00 · a3ddb223e2
commit a3ddb223e2
parent 26079c1a93
4 changed files with 22 additions and 1 deletions
--- a/src/nouveau/compiler/nak/sm50.rs
+++ b/src/nouveau/compiler/nak/sm50.rs
@ -2653,6 +2653,11 @@ impl SM50Op for OpAtom {
                        _ => panic!("Unsupported data type"),
                    };
                    e.set_field(28..30, data_type);
+                    assert!(
+                        self.atom_type != AtomType::U64
+                            || self.atom_op == AtomOp::Exch,
+                        "64-bit Shared atomics only support CmpExch or Exch"
+                    );
                    e.set_atom_op(52..56, self.atom_op);
                }

--- a/src/nouveau/compiler/nak/sm70.rs
+++ b/src/nouveau/compiler/nak/sm70.rs
@ -3077,6 +3077,11 @@ impl SM70Op for OpAtom {
                    e.set_opcode(0x38c);

                    e.set_reg_src(32..40, self.data);
+                    assert!(
+                        self.atom_type != AtomType::U64
+                            || self.atom_op == AtomOp::Exch,
+                        "64-bit Shared atomics only support CmpExch or Exch"
+                    );
                    e.set_atom_op(87..91, self.atom_op);
                }

--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@ -906,6 +906,15 @@ type_size_vec4(const struct glsl_type *type, bool bindless)
   return glsl_count_vec4_slots(type, false, bindless);
 }

+static bool
+atomic_supported(const nir_instr *instr, const void *data)
+{
+   /* Shared atomics don't support 64-bit arithmetic */
+   const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   return !(intr->intrinsic == nir_intrinsic_shared_atomic &&
+            intr->def.bit_size == 64);
+}
+
 void
 nak_postprocess_nir(nir_shader *nir,
                    const struct nak_compiler *nak,
@ -930,6 +939,7 @@ nak_postprocess_nir(nir_shader *nir,
      .lower_rotate_to_shuffle = true
   };
   OPT(nir, nir_lower_subgroups, &subgroups_options);
+   OPT(nir, nir_lower_atomics, atomic_supported);
   OPT(nir, nak_nir_lower_scan_reduce);

   if (nir_shader_has_local_variables(nir)) {
--- a/src/nouveau/vulkan/nvk_physical_device.c
+++ b/src/nouveau/vulkan/nvk_physical_device.c
@ -376,7 +376,8 @@ nvk_get_device_features(const struct nv_device_info *info,
      .storagePushConstant8 = true,
      .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
                                  nvk_use_nak(info),
-      .shaderSharedInt64Atomics = false, /* TODO */
+      .shaderSharedInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
+                                  nvk_use_nak(info),
      /* TODO: Fp16 is currently busted on Turing and Volta due to instruction
       * scheduling issues.  Re-enable it once those are sorted.
       */