nvk, nak: Implement shaderSharedInt64Atomics
Current nvidia devices miss support for 64-bit arithmetic atomics, we replace them with compare-and-swap loops using nir_lower_atomics. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10330 Signed-off-by: Lorenzo Rossi <snowycoder@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33572>
This commit is contained in:
parent
26079c1a93
commit
a3ddb223e2
4 changed files with 22 additions and 1 deletions
|
|
@ -2653,6 +2653,11 @@ impl SM50Op for OpAtom {
|
|||
_ => panic!("Unsupported data type"),
|
||||
};
|
||||
e.set_field(28..30, data_type);
|
||||
assert!(
|
||||
self.atom_type != AtomType::U64
|
||||
|| self.atom_op == AtomOp::Exch,
|
||||
"64-bit Shared atomics only support CmpExch or Exch"
|
||||
);
|
||||
e.set_atom_op(52..56, self.atom_op);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3077,6 +3077,11 @@ impl SM70Op for OpAtom {
|
|||
e.set_opcode(0x38c);
|
||||
|
||||
e.set_reg_src(32..40, self.data);
|
||||
assert!(
|
||||
self.atom_type != AtomType::U64
|
||||
|| self.atom_op == AtomOp::Exch,
|
||||
"64-bit Shared atomics only support CmpExch or Exch"
|
||||
);
|
||||
e.set_atom_op(87..91, self.atom_op);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -906,6 +906,15 @@ type_size_vec4(const struct glsl_type *type, bool bindless)
|
|||
return glsl_count_vec4_slots(type, false, bindless);
|
||||
}
|
||||
|
||||
static bool
|
||||
atomic_supported(const nir_instr *instr, const void *data)
|
||||
{
|
||||
/* Shared atomics don't support 64-bit arithmetic */
|
||||
const nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
return !(intr->intrinsic == nir_intrinsic_shared_atomic &&
|
||||
intr->def.bit_size == 64);
|
||||
}
|
||||
|
||||
void
|
||||
nak_postprocess_nir(nir_shader *nir,
|
||||
const struct nak_compiler *nak,
|
||||
|
|
@ -930,6 +939,7 @@ nak_postprocess_nir(nir_shader *nir,
|
|||
.lower_rotate_to_shuffle = true
|
||||
};
|
||||
OPT(nir, nir_lower_subgroups, &subgroups_options);
|
||||
OPT(nir, nir_lower_atomics, atomic_supported);
|
||||
OPT(nir, nak_nir_lower_scan_reduce);
|
||||
|
||||
if (nir_shader_has_local_variables(nir)) {
|
||||
|
|
|
|||
|
|
@ -376,7 +376,8 @@ nvk_get_device_features(const struct nv_device_info *info,
|
|||
.storagePushConstant8 = true,
|
||||
.shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
|
||||
nvk_use_nak(info),
|
||||
.shaderSharedInt64Atomics = false, /* TODO */
|
||||
.shaderSharedInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
|
||||
nvk_use_nak(info),
|
||||
/* TODO: Fp16 is currently busted on Turing and Volta due to instruction
|
||||
* scheduling issues. Re-enable it once those are sorted.
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue