From a3ddb223e2f163043d24844cf85264190aed3084 Mon Sep 17 00:00:00 2001 From: Lorenzo Rossi Date: Mon, 17 Feb 2025 00:39:32 +0100 Subject: [PATCH] nvk, nak: Implement shaderSharedInt64Atomics Current nvidia devices miss support for 64-bit arithmetic atomics, we replace them with compare-and-swap loops using nir_lower_atomics. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10330 Signed-off-by: Lorenzo Rossi Part-of: --- src/nouveau/compiler/nak/sm50.rs | 5 +++++ src/nouveau/compiler/nak/sm70.rs | 5 +++++ src/nouveau/compiler/nak_nir.c | 10 ++++++++++ src/nouveau/vulkan/nvk_physical_device.c | 3 ++- 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/nouveau/compiler/nak/sm50.rs b/src/nouveau/compiler/nak/sm50.rs index d81a71c6175..c41743d6a62 100644 --- a/src/nouveau/compiler/nak/sm50.rs +++ b/src/nouveau/compiler/nak/sm50.rs @@ -2653,6 +2653,11 @@ impl SM50Op for OpAtom { _ => panic!("Unsupported data type"), }; e.set_field(28..30, data_type); + assert!( + self.atom_type != AtomType::U64 + || self.atom_op == AtomOp::Exch, + "64-bit Shared atomics only support CmpExch or Exch" + ); e.set_atom_op(52..56, self.atom_op); } diff --git a/src/nouveau/compiler/nak/sm70.rs b/src/nouveau/compiler/nak/sm70.rs index 1059dd30954..6df6d34f2fc 100644 --- a/src/nouveau/compiler/nak/sm70.rs +++ b/src/nouveau/compiler/nak/sm70.rs @@ -3077,6 +3077,11 @@ impl SM70Op for OpAtom { e.set_opcode(0x38c); e.set_reg_src(32..40, self.data); + assert!( + self.atom_type != AtomType::U64 + || self.atom_op == AtomOp::Exch, + "64-bit Shared atomics only support CmpExch or Exch" + ); e.set_atom_op(87..91, self.atom_op); } diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 33a63533d6b..3631426d390 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -906,6 +906,15 @@ type_size_vec4(const struct glsl_type *type, bool bindless) return glsl_count_vec4_slots(type, false, bindless); } +static bool +atomic_supported(const nir_instr *instr, const void *data) +{ + /* Shared atomics don't support 64-bit arithmetic */ + const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + return !(intr->intrinsic == nir_intrinsic_shared_atomic && + intr->def.bit_size == 64); +} + void nak_postprocess_nir(nir_shader *nir, const struct nak_compiler *nak, @@ -930,6 +939,7 @@ nak_postprocess_nir(nir_shader *nir, .lower_rotate_to_shuffle = true }; OPT(nir, nir_lower_subgroups, &subgroups_options); + OPT(nir, nir_lower_atomics, atomic_supported); OPT(nir, nak_nir_lower_scan_reduce); if (nir_shader_has_local_variables(nir)) { diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index 477682c05a5..4dd9d8b7b33 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -376,7 +376,8 @@ nvk_get_device_features(const struct nv_device_info *info, .storagePushConstant8 = true, .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A && nvk_use_nak(info), - .shaderSharedInt64Atomics = false, /* TODO */ + .shaderSharedInt64Atomics = info->cls_eng3d >= MAXWELL_A && + nvk_use_nak(info), /* TODO: Fp16 is currently busted on Turing and Volta due to instruction * scheduling issues. Re-enable it once those are sorted. */