nvk, nak: Implement shaderSharedInt64Atomics

Current nvidia devices miss support for 64-bit arithmetic atomics, we
replace them with compare-and-swap loops using nir_lower_atomics.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10330
Signed-off-by: Lorenzo Rossi <snowycoder@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33572>
This commit is contained in:
Lorenzo Rossi 2025-02-17 00:39:32 +01:00 committed by Marge Bot
parent 26079c1a93
commit a3ddb223e2
4 changed files with 22 additions and 1 deletions

View file

@ -2653,6 +2653,11 @@ impl SM50Op for OpAtom {
_ => panic!("Unsupported data type"),
};
e.set_field(28..30, data_type);
assert!(
self.atom_type != AtomType::U64
|| self.atom_op == AtomOp::Exch,
"64-bit Shared atomics only support CmpExch or Exch"
);
e.set_atom_op(52..56, self.atom_op);
}

View file

@ -3077,6 +3077,11 @@ impl SM70Op for OpAtom {
e.set_opcode(0x38c);
e.set_reg_src(32..40, self.data);
assert!(
self.atom_type != AtomType::U64
|| self.atom_op == AtomOp::Exch,
"64-bit Shared atomics only support CmpExch or Exch"
);
e.set_atom_op(87..91, self.atom_op);
}

View file

@ -906,6 +906,15 @@ type_size_vec4(const struct glsl_type *type, bool bindless)
return glsl_count_vec4_slots(type, false, bindless);
}
static bool
atomic_supported(const nir_instr *instr, const void *data)
{
/* Shared atomics don't support 64-bit arithmetic */
const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
return !(intr->intrinsic == nir_intrinsic_shared_atomic &&
intr->def.bit_size == 64);
}
void
nak_postprocess_nir(nir_shader *nir,
const struct nak_compiler *nak,
@ -930,6 +939,7 @@ nak_postprocess_nir(nir_shader *nir,
.lower_rotate_to_shuffle = true
};
OPT(nir, nir_lower_subgroups, &subgroups_options);
OPT(nir, nir_lower_atomics, atomic_supported);
OPT(nir, nak_nir_lower_scan_reduce);
if (nir_shader_has_local_variables(nir)) {

View file

@ -376,7 +376,8 @@ nvk_get_device_features(const struct nv_device_info *info,
.storagePushConstant8 = true,
.shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
nvk_use_nak(info),
.shaderSharedInt64Atomics = false, /* TODO */
.shaderSharedInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
nvk_use_nak(info),
/* TODO: Fp16 is currently busted on Turing and Volta due to instruction
* scheduling issues. Re-enable it once those are sorted.
*/