llvmpipe: do bounds checking for shared memory

Just compare against the size that was declared. This is probably overkill. I couldn't figure out what vulkan says wrt OOB access of shared memory. D3D however (which is very strict about these things) says that for TGSM writes the entire contents of the TGSM becomes undefined, for reads the result is undefined. Hence, rather than masking out such accesses, to avoid the segfaults it would be enough to just clamp the offsets to valid values. nir doesn't seem easily able to tell us if an access is guaranteed in-bound (unlike for ssbo access), so assume always potentially OOB. v2: fix rusticl - for cl we don't know the shared size at compilation time, this is only provided at launch_grid() time, the nir shader info shared_size might be zero. Hence pass through the size via cs jit context, there already actually was a member in there which looks like it was intended for that (interestingly enough, the cs jit context was actually unused, since resources are passed elsewhere nowadays). Reviewed-by: Brian Paul <brian.paul@broadcom.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38307>
2025-11-10 23:17:23 +01:00 · 2025-11-10 23:17:23 +01:00 · d6fd8b4201
commit d6fd8b4201
parent 4490275332
4 changed files with 38 additions and 22 deletions
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
@ -218,6 +218,7 @@ struct lp_build_nir_soa_context
   LLVMValueRef ssbo_ptr;

   LLVMValueRef shared_ptr;
+   LLVMValueRef shared_size;
   LLVMValueRef payload_ptr;
   LLVMValueRef scratch_ptr;
   unsigned scratch_size;
@ -1317,11 +1318,27 @@ mem_access_base_pointer(struct lp_build_nir_soa_context *bld,
         ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld->int64_bld.elem_type, "");
         ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
         ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
+         if (bounds)
+            *bounds = NULL;
      }
-      else
+      else {
         ptr = bld->shared_ptr;
-      if (bounds)
-         *bounds = NULL;
+         if (bounds) {
+            /*
+             * For shared mem, use essentially the same logic as for SSBOs,
+             * and clamp against the declared size.
+             * Note this is probably overkill - unsure of vulkan semantics,
+             * but d3d would allow for instance to just clamp offsets,
+             * since OOB writes cause shared mem to become undefined, and OOB
+             * reads have undefined results, unlike for SSBOs there's no
+             * need to ignore writes and return 0 for reads.
+             * Bounds is in units of bit_size (not bytes).
+             */
+            uint32_t shift_val = bit_size_to_shift_size(bit_size);
+            *bounds = LLVMBuildAShr(gallivm->builder, bld->shared_size,
+                                    lp_build_const_int32(gallivm, shift_val), "");
+         }
+      }
   }

   /* Cast it to the pointer type of the access this instruction is doing. */
@ -4580,7 +4597,7 @@ visit_shared_load(struct lp_build_nir_soa_context *bld,
   bool offset_is_uniform = !lp_nir_instr_src_divergent(&instr->instr, 0);
   emit_load_mem(bld, instr->def.num_components,
                 instr->def.bit_size, true,
-                 offset_is_uniform, false, true, NULL, offset, result);
+                 offset_is_uniform, false, false, NULL, offset, result);
 }

 static void
@ -4593,7 +4610,7 @@ visit_shared_store(struct lp_build_nir_soa_context *bld,
   int writemask = nir_intrinsic_write_mask(instr);
   int nc = nir_src_num_components(instr->src[0]);
   int bitsize = nir_src_bit_size(instr->src[0]);
-   emit_store_mem(bld, writemask, nc, bitsize, false, true, NULL, offset, val);
+   emit_store_mem(bld, writemask, nc, bitsize, false, false, NULL, offset, val);
 }

 static void
@ -4608,7 +4625,7 @@ visit_shared_atomic(struct lp_build_nir_soa_context *bld,
   if (instr->intrinsic == nir_intrinsic_shared_atomic_swap)
      val2 = get_src(bld, &instr->src[2], 0);

-   emit_atomic_mem(bld, nir_intrinsic_atomic_op(instr), bitsize, false, true,
+   emit_atomic_mem(bld, nir_intrinsic_atomic_op(instr), bitsize, false, false,
                   NULL, offset, val, val2, &result[0]);
 }

@ -6019,6 +6036,7 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm,
   bld.thread_data_ptr = params->thread_data_ptr;
   bld.image = params->image;
   bld.shared_ptr = params->shared_ptr;
+   bld.shared_size = params->shared_size;
   bld.payload_ptr = params->payload_ptr;
   bld.coro = params->coro;
   bld.num_inputs = params->num_inputs;
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@ -282,6 +282,7 @@ struct lp_build_tgsi_params {
   LLVMValueRef ssbo_sizes_ptr;
   const struct lp_build_image_soa *image;
   LLVMValueRef shared_ptr;
+   LLVMValueRef shared_size;
   LLVMValueRef payload_ptr;
   const struct lp_build_coro_suspend_info *coro;
   const struct lp_build_fs_iface *fs_iface;
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@ -336,23 +336,8 @@ enum {
   LP_JIT_CS_CTX_COUNT
 };

-#define lp_jit_cs_context_constants(_gallivm, _type, _ptr) \
-   lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_CONSTANTS, "constants")
-
-#define lp_jit_cs_context_ssbos(_gallivm, _type, _ptr) \
-   lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SSBOS, "ssbos")
-
-#define lp_jit_cs_context_textures(_gallivm, _type, _ptr) \
-   lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_TEXTURES, "textures")
-
-#define lp_jit_cs_context_samplers(_gallivm, _type, _ptr) \
-   lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SAMPLERS, "samplers")
-
-#define lp_jit_cs_context_images(_gallivm, _type, _ptr) \
-   lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_IMAGES, "images")
-
 #define lp_jit_cs_context_shared_size(_gallivm, _type, _ptr) \
-   lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size")
+   lp_build_struct_get2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size")

 typedef void
 (*lp_jit_cs_func)(const struct lp_jit_cs_context *context,
--- a/src/gallium/drivers/llvmpipe/lp_state_cs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c
@ -557,6 +557,10 @@ generate_compute(struct llvmpipe_context *lp,
                                                  params.resources_ptr);
         params.image = image;

+         params.shared_size = lp_jit_cs_context_shared_size(gallivm,
+                                                            variant->jit_cs_context_type,
+                                                            params.context_ptr);
+
         lp_build_nir_soa_func(gallivm, shader->base.ir.nir,
                               func->impl,
                               &params,
@ -873,6 +877,10 @@ generate_compute(struct llvmpipe_context *lp,
      params.ssbo_ptr = ssbo_ptr;
      params.image = image;
      params.shared_ptr = shared_ptr;
+      params.shared_size = lp_jit_cs_context_shared_size(gallivm,
+                                                         variant->jit_cs_context_type,
+                                                         context_ptr);
+
      params.payload_ptr = payload_ptr;
      params.coro = &coro_info;
      params.mesh_iface = &mesh_iface.base;
@ -1793,6 +1801,8 @@ llvmpipe_launch_grid(struct pipe_context *pipe,
   job_info.req_local_mem = llvmpipe->cs->req_local_mem + info->variable_shared_mem;
   job_info.zero_initialize_shared_memory = llvmpipe->cs->zero_initialize_shared_memory;
   job_info.current = &llvmpipe->csctx->cs.current;
+   /* Not really sure this should be done here? */
+   job_info.current->jit_context.shared_size = job_info.req_local_mem;

   int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0];
   if (num_tasks) {
@ -2196,6 +2206,7 @@ llvmpipe_draw_mesh_tasks(struct pipe_context *pipe,
         job_info.draw_id = dr;
         job_info.req_local_mem = lp->tss->req_local_mem + info->variable_shared_mem;
         job_info.current = &lp->task_ctx->cs.current;
+         job_info.current->jit_context.shared_size = job_info.req_local_mem;

         if (num_tasks) {
            struct lp_cs_tpool_task *task;
@ -2226,6 +2237,7 @@ llvmpipe_draw_mesh_tasks(struct pipe_context *pipe,

         job_info.req_local_mem = lp->mhs->req_local_mem + info->variable_shared_mem;
         job_info.current = &lp->mesh_ctx->cs.current;
+         job_info.current->jit_context.shared_size = job_info.req_local_mem;
         job_info.payload_stride = 0;
         job_info.draw_id = dr;
         job_info.io_stride = task_out_size;