diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c index a31adf90160..2a9a5881a1a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c @@ -218,6 +218,7 @@ struct lp_build_nir_soa_context LLVMValueRef ssbo_ptr; LLVMValueRef shared_ptr; + LLVMValueRef shared_size; LLVMValueRef payload_ptr; LLVMValueRef scratch_ptr; unsigned scratch_size; @@ -1317,11 +1318,27 @@ mem_access_base_pointer(struct lp_build_nir_soa_context *bld, ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld->int64_bld.elem_type, ""); ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), ""); ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + if (bounds) + *bounds = NULL; } - else + else { ptr = bld->shared_ptr; - if (bounds) - *bounds = NULL; + if (bounds) { + /* + * For shared mem, use essentially the same logic as for SSBOs, + * and clamp against the declared size. + * Note this is probably overkill - unsure of vulkan semantics, + * but d3d would allow for instance to just clamp offsets, + * since OOB writes cause shared mem to become undefined, and OOB + * reads have undefined results, unlike for SSBOs there's no + * need to ignore writes and return 0 for reads. + * Bounds is in units of bit_size (not bytes). + */ + uint32_t shift_val = bit_size_to_shift_size(bit_size); + *bounds = LLVMBuildAShr(gallivm->builder, bld->shared_size, + lp_build_const_int32(gallivm, shift_val), ""); + } + } } /* Cast it to the pointer type of the access this instruction is doing. */ @@ -4580,7 +4597,7 @@ visit_shared_load(struct lp_build_nir_soa_context *bld, bool offset_is_uniform = !lp_nir_instr_src_divergent(&instr->instr, 0); emit_load_mem(bld, instr->def.num_components, instr->def.bit_size, true, - offset_is_uniform, false, true, NULL, offset, result); + offset_is_uniform, false, false, NULL, offset, result); } static void @@ -4593,7 +4610,7 @@ visit_shared_store(struct lp_build_nir_soa_context *bld, int writemask = nir_intrinsic_write_mask(instr); int nc = nir_src_num_components(instr->src[0]); int bitsize = nir_src_bit_size(instr->src[0]); - emit_store_mem(bld, writemask, nc, bitsize, false, true, NULL, offset, val); + emit_store_mem(bld, writemask, nc, bitsize, false, false, NULL, offset, val); } static void @@ -4608,7 +4625,7 @@ visit_shared_atomic(struct lp_build_nir_soa_context *bld, if (instr->intrinsic == nir_intrinsic_shared_atomic_swap) val2 = get_src(bld, &instr->src[2], 0); - emit_atomic_mem(bld, nir_intrinsic_atomic_op(instr), bitsize, false, true, + emit_atomic_mem(bld, nir_intrinsic_atomic_op(instr), bitsize, false, false, NULL, offset, val, val2, &result[0]); } @@ -6019,6 +6036,7 @@ void lp_build_nir_soa_func(struct gallivm_state *gallivm, bld.thread_data_ptr = params->thread_data_ptr; bld.image = params->image; bld.shared_ptr = params->shared_ptr; + bld.shared_size = params->shared_size; bld.payload_ptr = params->payload_ptr; bld.coro = params->coro; bld.num_inputs = params->num_inputs; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 4f96be859a1..8889747a1d9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -282,6 +282,7 @@ struct lp_build_tgsi_params { LLVMValueRef ssbo_sizes_ptr; const struct lp_build_image_soa *image; LLVMValueRef shared_ptr; + LLVMValueRef shared_size; LLVMValueRef payload_ptr; const struct lp_build_coro_suspend_info *coro; const struct lp_build_fs_iface *fs_iface; diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h index 24a6143adb4..87cd01cdb8b 100644 --- a/src/gallium/drivers/llvmpipe/lp_jit.h +++ b/src/gallium/drivers/llvmpipe/lp_jit.h @@ -336,23 +336,8 @@ enum { LP_JIT_CS_CTX_COUNT }; -#define lp_jit_cs_context_constants(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_CONSTANTS, "constants") - -#define lp_jit_cs_context_ssbos(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SSBOS, "ssbos") - -#define lp_jit_cs_context_textures(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_TEXTURES, "textures") - -#define lp_jit_cs_context_samplers(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SAMPLERS, "samplers") - -#define lp_jit_cs_context_images(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_IMAGES, "images") - #define lp_jit_cs_context_shared_size(_gallivm, _type, _ptr) \ - lp_build_struct_get_ptr2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size") + lp_build_struct_get2(_gallivm, _type, _ptr, LP_JIT_CS_CTX_SHARED_SIZE, "shared_size") typedef void (*lp_jit_cs_func)(const struct lp_jit_cs_context *context, diff --git a/src/gallium/drivers/llvmpipe/lp_state_cs.c b/src/gallium/drivers/llvmpipe/lp_state_cs.c index ea00ab9d988..75d1dc38241 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_cs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_cs.c @@ -557,6 +557,10 @@ generate_compute(struct llvmpipe_context *lp, params.resources_ptr); params.image = image; + params.shared_size = lp_jit_cs_context_shared_size(gallivm, + variant->jit_cs_context_type, + params.context_ptr); + lp_build_nir_soa_func(gallivm, shader->base.ir.nir, func->impl, ¶ms, @@ -873,6 +877,10 @@ generate_compute(struct llvmpipe_context *lp, params.ssbo_ptr = ssbo_ptr; params.image = image; params.shared_ptr = shared_ptr; + params.shared_size = lp_jit_cs_context_shared_size(gallivm, + variant->jit_cs_context_type, + context_ptr); + params.payload_ptr = payload_ptr; params.coro = &coro_info; params.mesh_iface = &mesh_iface.base; @@ -1793,6 +1801,8 @@ llvmpipe_launch_grid(struct pipe_context *pipe, job_info.req_local_mem = llvmpipe->cs->req_local_mem + info->variable_shared_mem; job_info.zero_initialize_shared_memory = llvmpipe->cs->zero_initialize_shared_memory; job_info.current = &llvmpipe->csctx->cs.current; + /* Not really sure this should be done here? */ + job_info.current->jit_context.shared_size = job_info.req_local_mem; int num_tasks = job_info.grid_size[2] * job_info.grid_size[1] * job_info.grid_size[0]; if (num_tasks) { @@ -2196,6 +2206,7 @@ llvmpipe_draw_mesh_tasks(struct pipe_context *pipe, job_info.draw_id = dr; job_info.req_local_mem = lp->tss->req_local_mem + info->variable_shared_mem; job_info.current = &lp->task_ctx->cs.current; + job_info.current->jit_context.shared_size = job_info.req_local_mem; if (num_tasks) { struct lp_cs_tpool_task *task; @@ -2226,6 +2237,7 @@ llvmpipe_draw_mesh_tasks(struct pipe_context *pipe, job_info.req_local_mem = lp->mhs->req_local_mem + info->variable_shared_mem; job_info.current = &lp->mesh_ctx->cs.current; + job_info.current->jit_context.shared_size = job_info.req_local_mem; job_info.payload_stride = 0; job_info.draw_id = dr; job_info.io_stride = task_out_size;