llvmpipe: do bounds checking for shared memory

Just compare against the size that was declared.
This is probably overkill. I couldn't figure out what vulkan says wrt
OOB access of shared memory. D3D however (which is very strict about
these things) says that for TGSM writes the entire contents of the TGSM
becomes undefined, for reads the result is undefined. Hence, rather
than masking out such accesses, to avoid the segfaults it would be
enough to just clamp the offsets to valid values.
nir doesn't seem easily able to tell us if an access is guaranteed
in-bound (unlike for ssbo access), so assume always potentially OOB.

v2: fix rusticl - for cl we don't know the shared size at compilation
time, this is only provided at launch_grid() time, the nir shader info
shared_size might be zero. Hence pass through the size via cs jit
context, there already actually was a member in there which looks
like it was intended for that (interestingly enough, the cs jit context
was actually unused, since resources are passed elsewhere nowadays).

Reviewed-by: Brian Paul <brian.paul@broadcom.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38307>
This commit is contained in:
Roland Scheidegger 2025-11-10 23:17:23 +01:00 committed by Marge Bot
parent 4490275332
commit d6fd8b4201
4 changed files with 38 additions and 22 deletions

View file

@ -218,6 +218,7 @@ struct lp_build_nir_soa_context
LLVMValueRef ssbo_ptr;
LLVMValueRef shared_ptr;
LLVMValueRef shared_size;
LLVMValueRef payload_ptr;
LLVMValueRef scratch_ptr;
unsigned scratch_size;
@ -1317,11 +1318,27 @@ mem_access_base_pointer(struct lp_build_nir_soa_context *bld,
ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld->int64_bld.elem_type, "");
ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
if (bounds)
*bounds = NULL;
}
else
else {
ptr = bld->shared_ptr;
if (bounds)
*bounds = NULL;
if (bounds) {
/*
* For shared mem, use essentially the same logic as for SSBOs,
* and clamp against the declared size.
* Note this is probably overkill - unsure of vulkan semantics,
* but d3d would allow for instance to just clamp offsets,
* since OOB writes cause shared mem to become undefined, and OOB
* reads have undefined results, unlike for SSBOs there's no
* need to ignore writes and return 0 for reads.
* Bounds is in units of bit_size (not bytes).
*/
uint32_t shift_val = bit_size_to_shift_size(bit_size);
*bounds = LLVMBuildAShr(gallivm->builder, bld->shared_size,
lp_build_const_int32(gallivm, shift_val), "");
}
}
}
/* Cast it to the pointer type of the access this instruction is doing. */
@ -4580,7 +4597,7 @@ visit_shared_load(struct lp_build_nir_soa_context *bld,
bool offset_is_uniform = !lp_nir_instr_src_divergent(&instr->instr, 0);
emit_load_mem(bld, instr->def.num_components,
instr->def.bit_size, true,
offset_is_uniform, false, true, NULL, offset, result);
offset_is_uniform, false, false, NULL, offset, result);
}
static void
@ -4593,7 +4610,7 @@ visit_shared_store(struct lp_build_nir_soa_context *bld,
int writemask = nir_intrinsic_write_mask(instr);
int nc = nir_src_num_components(instr->src[0]);
int bitsize = nir_src_bit_size(instr->src[0]);
emit_store_mem(bld, writemask, nc, bitsize, false, true, NULL, offset, val);
emit_store_mem(bld, writemask, nc, bitsize, false, false, NULL, offset, val);
}
static void
@ -4608,7 +4625,7 @@ visit_shared_atomic(struct lp_build_nir_soa_context *bld,
if (instr->intrinsic == nir_intrinsic_shared_atomic_swap)
val2 = get_src(bld, &instr->src[2], 0);
emit_atomic_mem(bld, nir_intrinsic_atomic_op(instr), bitsize, false, true,
emit_atomic_mem(bld, nir_intrinsic_atomic_op(instr), bitsize, false, false,
NULL, offset, val, val2, &result[0]);
}
@ -6019,6 +6036,7 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm,
bld.thread_data_ptr = params->thread_data_ptr;
bld.image = params->image;
bld.shared_ptr = params->shared_ptr;
bld.shared_size = params->shared_size;
bld.payload_ptr = params->payload_ptr;
bld.coro = params->coro;
bld.num_inputs = params->num_inputs;

View file

@ -282,6 +282,7 @@ struct lp_build_tgsi_params {
LLVMValueRef ssbo_sizes_ptr;
const struct lp_build_image_soa *image;
LLVMValueRef shared_ptr;
LLVMValueRef shared_size;
LLVMValueRef payload_ptr;
const struct lp_build_coro_suspend_info *coro;
const struct lp_build_fs_iface *fs_iface;

View file

@ -336,23 +336,8 @@ enum {
LP_JIT_CS_CTX_COUNT
};
#define lp_jit_cs_context_constants(_gallivm, _type, _ptr) \
lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_CONSTANTS, "constants")
#define lp_jit_cs_context_ssbos(_gallivm, _type, _ptr) \
lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SSBOS, "ssbos")
#define lp_jit_cs_context_textures(_gallivm, _type, _ptr) \
lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_TEXTURES, "textures")
#define lp_jit_cs_context_samplers(_gallivm, _type, _ptr) \
lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SAMPLERS, "samplers")
#define lp_jit_cs_context_images(_gallivm, _type, _ptr) \
lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_IMAGES, "images")
#define lp_jit_cs_context_shared_size(_gallivm, _type, _ptr) \
lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size")
lp_build_struct_get2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size")
typedef void
(*lp_jit_cs_func)(const struct lp_jit_cs_context *context,

View file

@ -557,6 +557,10 @@ generate_compute(struct llvmpipe_context *lp,
params.resources_ptr);
params.image = image;
params.shared_size = lp_jit_cs_context_shared_size(gallivm,
variant->jit_cs_context_type,
params.context_ptr);
lp_build_nir_soa_func(gallivm, shader->base.ir.nir,
func->impl,
&params,
@ -873,6 +877,10 @@ generate_compute(struct llvmpipe_context *lp,
params.ssbo_ptr = ssbo_ptr;
params.image = image;
params.shared_ptr = shared_ptr;
params.shared_size = lp_jit_cs_context_shared_size(gallivm,
variant->jit_cs_context_type,
context_ptr);
params.payload_ptr = payload_ptr;
params.coro = &coro_info;
params.mesh_iface = &mesh_iface.base;
@ -1793,6 +1801,8 @@ llvmpipe_launch_grid(struct pipe_context *pipe,
job_info.req_local_mem = llvmpipe->cs->req_local_mem + info->variable_shared_mem;
job_info.zero_initialize_shared_memory = llvmpipe->cs->zero_initialize_shared_memory;
job_info.current = &llvmpipe->csctx->cs.current;
/* Not really sure this should be done here? */
job_info.current->jit_context.shared_size = job_info.req_local_mem;
int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0];
if (num_tasks) {
@ -2196,6 +2206,7 @@ llvmpipe_draw_mesh_tasks(struct pipe_context *pipe,
job_info.draw_id = dr;
job_info.req_local_mem = lp->tss->req_local_mem + info->variable_shared_mem;
job_info.current = &lp->task_ctx->cs.current;
job_info.current->jit_context.shared_size = job_info.req_local_mem;
if (num_tasks) {
struct lp_cs_tpool_task *task;
@ -2226,6 +2237,7 @@ llvmpipe_draw_mesh_tasks(struct pipe_context *pipe,
job_info.req_local_mem = lp->mhs->req_local_mem + info->variable_shared_mem;
job_info.current = &lp->mesh_ctx->cs.current;
job_info.current->jit_context.shared_size = job_info.req_local_mem;
job_info.payload_stride = 0;
job_info.draw_id = dr;
job_info.io_stride = task_out_size;