From d6fd8b42015df044fc3820cc19ff1170fdc36c08 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Mon, 10 Nov 2025 23:17:23 +0100 Subject: [PATCH] llvmpipe: do bounds checking for shared memory Just compare against the size that was declared. This is probably overkill. I couldn't figure out what vulkan says wrt OOB access of shared memory. D3D however (which is very strict about these things) says that for TGSM writes the entire contents of the TGSM becomes undefined, for reads the result is undefined. Hence, rather than masking out such accesses, to avoid the segfaults it would be enough to just clamp the offsets to valid values. nir doesn't seem easily able to tell us if an access is guaranteed in-bound (unlike for ssbo access), so assume always potentially OOB. v2: fix rusticl - for cl we don't know the shared size at compilation time, this is only provided at launch_grid() time, the nir shader info shared_size might be zero. Hence pass through the size via cs jit context, there already actually was a member in there which looks like it was intended for that (interestingly enough, the cs jit context was actually unused, since resources are passed elsewhere nowadays). Reviewed-by: Brian Paul Part-of: --- .../auxiliary/gallivm/lp_bld_nir_soa.c | 30 +++++++++++++++---- src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 1 + src/gallium/drivers/llvmpipe/lp_jit.h | 17 +---------- src/gallium/drivers/llvmpipe/lp_state_cs.c | 12 ++++++++ 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index a31adf90160..2a9a5881a1a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -218,6 +218,7 @@ struct lp_build_nir_soa_context LLVMValueRef ssbo_ptr; LLVMValueRef shared_ptr; + LLVMValueRef shared_size; LLVMValueRef payload_ptr; LLVMValueRef scratch_ptr; unsigned scratch_size; @@ -1317,11 +1318,27 @@ mem_access_base_pointer(struct lp_build_nir_soa_context *bld, ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld->int64_bld.elem_type, ""); ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), ""); ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + if (bounds) + *bounds = NULL; } - else + else { ptr = bld->shared_ptr; - if (bounds) - *bounds = NULL; + if (bounds) { + /* + * For shared mem, use essentially the same logic as for SSBOs, + * and clamp against the declared size. + * Note this is probably overkill - unsure of vulkan semantics, + * but d3d would allow for instance to just clamp offsets, + * since OOB writes cause shared mem to become undefined, and OOB + * reads have undefined results, unlike for SSBOs there's no + * need to ignore writes and return 0 for reads. + * Bounds is in units of bit_size (not bytes). + */ + uint32_t shift_val = bit_size_to_shift_size(bit_size); + *bounds = LLVMBuildAShr(gallivm->builder, bld->shared_size, + lp_build_const_int32(gallivm, shift_val), ""); + } + } } /* Cast it to the pointer type of the access this instruction is doing. */ @@ -4580,7 +4597,7 @@ visit_shared_load(struct lp_build_nir_soa_context *bld, bool offset_is_uniform = !lp_nir_instr_src_divergent(&instr->instr, 0); emit_load_mem(bld, instr->def.num_components, instr->def.bit_size, true, - offset_is_uniform, false, true, NULL, offset, result); + offset_is_uniform, false, false, NULL, offset, result); } static void @@ -4593,7 +4610,7 @@ visit_shared_store(struct lp_build_nir_soa_context *bld, int writemask = nir_intrinsic_write_mask(instr); int nc = nir_src_num_components(instr->src[0]); int bitsize = nir_src_bit_size(instr->src[0]); - emit_store_mem(bld, writemask, nc, bitsize, false, true, NULL, offset, val); + emit_store_mem(bld, writemask, nc, bitsize, false, false, NULL, offset, val); } static void @@ -4608,7 +4625,7 @@ visit_shared_atomic(struct lp_build_nir_soa_context *bld, if (instr->intrinsic == nir_intrinsic_shared_atomic_swap) val2 = get_src(bld, &instr->src[2], 0); - emit_atomic_mem(bld, nir_intrinsic_atomic_op(instr), bitsize, false, true, + emit_atomic_mem(bld, nir_intrinsic_atomic_op(instr), bitsize, false, false, NULL, offset, val, val2, &result[0]); } @@ -6019,6 +6036,7 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm, bld.thread_data_ptr = params->thread_data_ptr; bld.image = params->image; bld.shared_ptr = params->shared_ptr; + bld.shared_size = params->shared_size; bld.payload_ptr = params->payload_ptr; bld.coro = params->coro; bld.num_inputs = params->num_inputs; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 4f96be859a1..8889747a1d9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -282,6 +282,7 @@ struct lp_build_tgsi_params { LLVMValueRef ssbo_sizes_ptr; const struct lp_build_image_soa *image; LLVMValueRef shared_ptr; + LLVMValueRef shared_size; LLVMValueRef payload_ptr; const struct lp_build_coro_suspend_info *coro; const struct lp_build_fs_iface *fs_iface; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 24a6143adb4..87cd01cdb8b 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -336,23 +336,8 @@ enum { LP_JIT_CS_CTX_COUNT }; -#define lp_jit_cs_context_constants(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_CONSTANTS, "constants") - -#define lp_jit_cs_context_ssbos(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SSBOS, "ssbos") - -#define lp_jit_cs_context_textures(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_TEXTURES, "textures") - -#define lp_jit_cs_context_samplers(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SAMPLERS, "samplers") - -#define lp_jit_cs_context_images(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_IMAGES, "images") - #define lp_jit_cs_context_shared_size(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size") + lp_build_struct_get2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size") typedef void (*lp_jit_cs_func)(const struct lp_jit_cs_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c index ea00ab9d988..75d1dc38241 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c @@ -557,6 +557,10 @@ generate_compute(struct llvmpipe_context *lp, params.resources_ptr); params.image = image; + params.shared_size = lp_jit_cs_context_shared_size(gallivm, + variant->jit_cs_context_type, + params.context_ptr); + lp_build_nir_soa_func(gallivm, shader->base.ir.nir, func->impl, ¶ms, @@ -873,6 +877,10 @@ generate_compute(struct llvmpipe_context *lp, params.ssbo_ptr = ssbo_ptr; params.image = image; params.shared_ptr = shared_ptr; + params.shared_size = lp_jit_cs_context_shared_size(gallivm, + variant->jit_cs_context_type, + context_ptr); + params.payload_ptr = payload_ptr; params.coro = &coro_info; params.mesh_iface = &mesh_iface.base; @@ -1793,6 +1801,8 @@ llvmpipe_launch_grid(struct pipe_context *pipe, job_info.req_local_mem = llvmpipe->cs->req_local_mem + info->variable_shared_mem; job_info.zero_initialize_shared_memory = llvmpipe->cs->zero_initialize_shared_memory; job_info.current = &llvmpipe->csctx->cs.current; + /* Not really sure this should be done here? */ + job_info.current->jit_context.shared_size = job_info.req_local_mem; int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0]; if (num_tasks) { @@ -2196,6 +2206,7 @@ llvmpipe_draw_mesh_tasks(struct pipe_context *pipe, job_info.draw_id = dr; job_info.req_local_mem = lp->tss->req_local_mem + info->variable_shared_mem; job_info.current = &lp->task_ctx->cs.current; + job_info.current->jit_context.shared_size = job_info.req_local_mem; if (num_tasks) { struct lp_cs_tpool_task *task; @@ -2226,6 +2237,7 @@ llvmpipe_draw_mesh_tasks(struct pipe_context *pipe, job_info.req_local_mem = lp->mhs->req_local_mem + info->variable_shared_mem; job_info.current = &lp->mesh_ctx->cs.current; + job_info.current->jit_context.shared_size = job_info.req_local_mem; job_info.payload_stride = 0; job_info.draw_id = dr; job_info.io_stride = task_out_size;